1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/gaudi2/gaudi2_special_blocks.h" 11 #include "../include/hw_ip/mmu/mmu_general.h" 12 #include "../include/hw_ip/mmu/mmu_v2_0.h" 13 #include "../include/gaudi2/gaudi2_packets.h" 14 #include "../include/gaudi2/gaudi2_reg_map.h" 15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 17 18 #include <linux/module.h> 19 #include <linux/pci.h> 20 #include <linux/hwmon.h> 21 #include <linux/iommu.h> 22 23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 24 25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 26 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ 27 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 28 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 29 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 30 #define GAUDI2_RESET_POLL_CNT 3 31 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 32 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 33 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 34 #define GAUDI2_CB_POOL_CB_CNT 512 35 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 36 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 37 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 38 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 39 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 40 41 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 42 43 /* 44 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 45 * and the code relies on that value (for array size etc..) we define another value 46 * for MAX faulty TPCs which reflects the cluster binning requirements 47 */ 48 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 49 #define MAX_FAULTY_XBARS 1 50 #define MAX_FAULTY_EDMAS 1 51 #define MAX_FAULTY_DECODERS 1 52 53 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 54 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 55 #define GAUDI2_DECODER_FULL_MASK 0x3FF 56 57 #define GAUDI2_NA_EVENT_CAUSE 0xFF 58 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 59 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 60 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 61 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 62 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 63 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 64 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 65 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 66 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 67 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 68 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 79 80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 82 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 83 84 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 86 87 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 88 89 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \ 90 (!((dma_core_idle_ind_mask) & \ 91 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \ 92 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK)))) 93 94 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 95 96 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 97 98 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 99 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 100 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 101 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 102 103 #define PCIE_DEC_EN_MASK 0x300 104 #define DEC_WORK_STATE_IDLE 0 105 #define DEC_WORK_STATE_PEND 3 106 #define IS_DEC_IDLE(dec_swreg15) \ 107 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 108 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 109 110 /* HBM MMU address scrambling parameters */ 111 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 112 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 113 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 114 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 115 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 116 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 117 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 118 #define MMU_RANGE_INV_EN_SHIFT 0 119 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 120 #define MMU_RANGE_INV_ASID_SHIFT 2 121 122 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 123 * a 2 entries FIFO, and hence it is not enabled for it. 124 */ 125 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 126 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 127 128 #define GAUDI2_MAX_STRING_LEN 64 129 130 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 131 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 132 133 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 134 135 enum hl_pmmu_fatal_cause { 136 LATENCY_RD_OUT_FIFO_OVERRUN, 137 LATENCY_WR_OUT_FIFO_OVERRUN, 138 }; 139 140 enum hl_pcie_drain_ind_cause { 141 LBW_AXI_DRAIN_IND, 142 HBW_AXI_DRAIN_IND 143 }; 144 145 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 146 [HBM_ID0] = 0xFFFC, 147 [HBM_ID1] = 0xFFCF, 148 [HBM_ID2] = 0xF7F7, 149 [HBM_ID3] = 0x7F7F, 150 [HBM_ID4] = 0xFCFF, 151 [HBM_ID5] = 0xCFFF, 152 }; 153 154 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 155 [0] = HBM_ID0, 156 [1] = HBM_ID1, 157 [2] = HBM_ID4, 158 [3] = HBM_ID5, 159 }; 160 161 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 162 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 163 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 164 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 165 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 166 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 167 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 168 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 169 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 170 }; 171 172 static const int gaudi2_qman_async_event_id[] = { 173 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 174 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 175 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 176 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 177 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 178 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 179 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 180 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 185 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 186 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 187 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 188 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 189 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 190 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 191 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 192 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 193 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 194 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 195 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 196 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 197 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 198 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 199 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 200 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 201 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 202 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 203 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 204 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 205 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 206 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 207 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 208 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 209 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 210 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 211 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 212 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 213 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 214 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 215 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 216 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 217 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 218 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 219 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 220 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 225 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 226 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 227 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 228 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 229 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 230 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 231 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 232 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 233 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 234 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 235 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 236 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 237 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 238 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 239 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 240 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 241 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 242 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 243 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 244 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 245 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 246 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 247 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 248 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 249 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 250 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 251 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 252 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 253 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 254 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 255 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 256 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 261 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 262 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 263 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 264 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 265 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 266 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 267 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 268 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 269 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 270 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 271 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 272 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 273 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 274 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 275 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 276 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 277 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 278 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 279 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 280 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 281 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 282 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 283 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 284 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 285 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 286 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 287 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 288 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 289 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 290 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 291 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 292 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 297 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 298 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 299 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 300 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 301 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 302 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 303 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 304 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 305 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 306 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 307 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 308 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 309 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 310 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 311 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 312 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 313 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 314 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 315 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 316 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 317 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 318 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 319 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 320 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 321 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 322 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 323 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 324 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 325 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 326 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 327 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 328 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 329 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 330 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 331 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 332 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 333 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 334 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 335 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 336 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 337 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 338 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 339 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 340 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 341 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 342 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 343 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 344 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 345 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 346 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 347 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 348 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 349 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 350 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 351 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 352 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 353 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 354 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 355 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 356 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 357 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 358 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 359 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 360 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 361 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 362 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 363 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 364 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 365 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 366 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 367 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 368 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 369 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 370 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 371 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 372 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 373 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 374 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 375 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 376 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 377 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 378 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 379 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 380 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 381 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 382 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 383 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 384 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 385 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 386 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 387 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 388 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 389 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 390 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 391 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 392 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 393 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 394 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 395 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 396 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 397 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 398 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 399 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 400 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 401 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 402 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 403 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 404 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 405 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 406 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 407 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 408 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 409 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 410 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 411 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 412 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 413 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 414 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 415 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 416 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 417 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 418 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 419 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 420 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 421 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 422 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 423 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 424 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 425 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 426 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 427 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 428 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 429 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 430 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 431 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 432 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 433 }; 434 435 static const int gaudi2_dma_core_async_event_id[] = { 436 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 437 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 438 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 439 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 440 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 441 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 442 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 443 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 444 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 445 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 446 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 447 }; 448 449 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 450 "qman sei intr", 451 "arc sei intr" 452 }; 453 454 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 455 "AXI_TERMINATOR WR", 456 "AXI_TERMINATOR RD", 457 "AXI SPLIT SEI Status" 458 }; 459 460 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 461 "cbu_bresp_sei_intr_cause", 462 "cbu_rresp_sei_intr_cause", 463 "lbu_bresp_sei_intr_cause", 464 "lbu_rresp_sei_intr_cause", 465 "cbu_axi_split_intr_cause", 466 "lbu_axi_split_intr_cause", 467 "arc_ip_excptn_sei_intr_cause", 468 "dmi_bresp_sei_intr_cause", 469 "aux2apb_err_sei_intr_cause", 470 "cfg_lbw_wr_terminated_intr_cause", 471 "cfg_lbw_rd_terminated_intr_cause", 472 "cfg_dccm_wr_terminated_intr_cause", 473 "cfg_dccm_rd_terminated_intr_cause", 474 "cfg_hbw_rd_terminated_intr_cause" 475 }; 476 477 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 478 "msix_vcd_hbw_sei", 479 "msix_l2c_hbw_sei", 480 "msix_nrm_hbw_sei", 481 "msix_abnrm_hbw_sei", 482 "msix_vcd_lbw_sei", 483 "msix_l2c_lbw_sei", 484 "msix_nrm_lbw_sei", 485 "msix_abnrm_lbw_sei", 486 "apb_vcd_lbw_sei", 487 "apb_l2c_lbw_sei", 488 "apb_nrm_lbw_sei", 489 "apb_abnrm_lbw_sei", 490 "dec_sei", 491 "dec_apb_sei", 492 "trc_apb_sei", 493 "lbw_mstr_if_sei", 494 "axi_split_bresp_err_sei", 495 "hbw_axi_wr_viol_sei", 496 "hbw_axi_rd_viol_sei", 497 "lbw_axi_wr_viol_sei", 498 "lbw_axi_rd_viol_sei", 499 "vcd_spi", 500 "l2c_spi", 501 "nrm_spi", 502 "abnrm_spi", 503 }; 504 505 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 506 "PQ AXI HBW error", 507 "CQ AXI HBW error", 508 "CP AXI HBW error", 509 "CP error due to undefined OPCODE", 510 "CP encountered STOP OPCODE", 511 "CP AXI LBW error", 512 "CP WRREG32 or WRBULK returned error", 513 "N/A", 514 "FENCE 0 inc over max value and clipped", 515 "FENCE 1 inc over max value and clipped", 516 "FENCE 2 inc over max value and clipped", 517 "FENCE 3 inc over max value and clipped", 518 "FENCE 0 dec under min value and clipped", 519 "FENCE 1 dec under min value and clipped", 520 "FENCE 2 dec under min value and clipped", 521 "FENCE 3 dec under min value and clipped", 522 "CPDMA Up overflow", 523 "PQC L2H error" 524 }; 525 526 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 527 "RSVD0", 528 "CQ AXI HBW error", 529 "CP AXI HBW error", 530 "CP error due to undefined OPCODE", 531 "CP encountered STOP OPCODE", 532 "CP AXI LBW error", 533 "CP WRREG32 or WRBULK returned error", 534 "N/A", 535 "FENCE 0 inc over max value and clipped", 536 "FENCE 1 inc over max value and clipped", 537 "FENCE 2 inc over max value and clipped", 538 "FENCE 3 inc over max value and clipped", 539 "FENCE 0 dec under min value and clipped", 540 "FENCE 1 dec under min value and clipped", 541 "FENCE 2 dec under min value and clipped", 542 "FENCE 3 dec under min value and clipped", 543 "CPDMA Up overflow", 544 "RSVD17", 545 "CQ_WR_IFIFO_CI_ERR", 546 "CQ_WR_CTL_CI_ERR", 547 "ARC_CQF_RD_ERR", 548 "ARC_CQ_WR_IFIFO_CI_ERR", 549 "ARC_CQ_WR_CTL_CI_ERR", 550 "ARC_AXI_ERR", 551 "CP_SWITCH_WDT_ERR" 552 }; 553 554 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 555 "Choice push while full error", 556 "Choice Q watchdog error", 557 "MSG AXI LBW returned with error" 558 }; 559 560 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 561 "qm_axi_err", 562 "qm_trace_fence_events", 563 "qm_sw_err", 564 "qm_cp_sw_stop", 565 "lbw_mstr_rresp_err", 566 "lbw_mstr_bresp_err", 567 "lbw_msg_slverr", 568 "hbw_msg_slverr", 569 "wbc_slverr", 570 "hbw_mstr_rresp_err", 571 "hbw_mstr_bresp_err", 572 "sb_resp_intr", 573 "mrsb_resp_intr", 574 "core_dw_status_0", 575 "core_dw_status_1", 576 "core_dw_status_2", 577 "core_dw_status_3", 578 "core_dw_status_4", 579 "core_dw_status_5", 580 "core_dw_status_6", 581 "core_dw_status_7", 582 "async_arc2cpu_sei_intr", 583 }; 584 585 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 586 "tpc_address_exceed_slm", 587 "tpc_div_by_0", 588 "tpc_spu_mac_overflow", 589 "tpc_spu_addsub_overflow", 590 "tpc_spu_abs_overflow", 591 "tpc_spu_fma_fp_dst_nan", 592 "tpc_spu_fma_fp_dst_inf", 593 "tpc_spu_convert_fp_dst_nan", 594 "tpc_spu_convert_fp_dst_inf", 595 "tpc_spu_fp_dst_denorm", 596 "tpc_vpu_mac_overflow", 597 "tpc_vpu_addsub_overflow", 598 "tpc_vpu_abs_overflow", 599 "tpc_vpu_convert_fp_dst_nan", 600 "tpc_vpu_convert_fp_dst_inf", 601 "tpc_vpu_fma_fp_dst_nan", 602 "tpc_vpu_fma_fp_dst_inf", 603 "tpc_vpu_fp_dst_denorm", 604 "tpc_assertions", 605 "tpc_illegal_instruction", 606 "tpc_pc_wrap_around", 607 "tpc_qm_sw_err", 608 "tpc_hbw_rresp_err", 609 "tpc_hbw_bresp_err", 610 "tpc_lbw_rresp_err", 611 "tpc_lbw_bresp_err", 612 "st_unlock_already_locked", 613 "invalid_lock_access", 614 "LD_L protection violation", 615 "ST_L protection violation", 616 }; 617 618 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 619 "agu_resp_intr", 620 "qman_axi_err", 621 "wap sei (wbc axi err)", 622 "arc sei", 623 "cfg access error", 624 "qm_sw_err", 625 "sbte_dbg_intr_0", 626 "sbte_dbg_intr_1", 627 "sbte_dbg_intr_2", 628 "sbte_dbg_intr_3", 629 "sbte_dbg_intr_4", 630 "sbte_prtn_intr_0", 631 "sbte_prtn_intr_1", 632 "sbte_prtn_intr_2", 633 "sbte_prtn_intr_3", 634 "sbte_prtn_intr_4", 635 }; 636 637 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 638 "i0", 639 "i1", 640 "i2", 641 "i3", 642 "i4", 643 }; 644 645 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 646 "WBC ERR RESP_0", 647 "WBC ERR RESP_1", 648 "AP SOURCE POS INF", 649 "AP SOURCE NEG INF", 650 "AP SOURCE NAN", 651 "AP RESULT POS INF", 652 "AP RESULT NEG INF", 653 }; 654 655 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 656 "HBW Read returned with error RRESP", 657 "HBW write returned with error BRESP", 658 "LBW write returned with error BRESP", 659 "descriptor_fifo_overflow", 660 "KDMA SB LBW Read returned with error", 661 "KDMA WBC LBW Write returned with error", 662 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 663 "WRONG CFG FOR COMMIT IN LIN DMA" 664 }; 665 666 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 667 "HBW/LBW Read returned with error RRESP", 668 "HBW/LBW write returned with error BRESP", 669 "LBW write returned with error BRESP", 670 "descriptor_fifo_overflow", 671 "KDMA SB LBW Read returned with error", 672 "KDMA WBC LBW Write returned with error", 673 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 674 "WRONG CFG FOR COMMIT IN LIN DMA" 675 }; 676 677 struct gaudi2_sm_sei_cause_data { 678 const char *cause_name; 679 const char *log_name; 680 }; 681 682 static const struct gaudi2_sm_sei_cause_data 683 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 684 {"calculated SO value overflow/underflow", "SOB ID"}, 685 {"payload address of monitor is not aligned to 4B", "monitor addr"}, 686 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"}, 687 }; 688 689 static const char * const 690 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 691 "LATENCY_RD_OUT_FIFO_OVERRUN", 692 "LATENCY_WR_OUT_FIFO_OVERRUN", 693 }; 694 695 static const char * const 696 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 697 "LATENCY_RD_OUT_FIFO_OVERRUN", 698 "LATENCY_WR_OUT_FIFO_OVERRUN", 699 }; 700 701 static const char * const 702 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 703 "AXI drain HBW", 704 "AXI drain LBW", 705 }; 706 707 static const char * const 708 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 709 "HBW error response", 710 "LBW error response", 711 "TLP is blocked by RR" 712 }; 713 714 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 715 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 716 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 717 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 718 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 719 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 720 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 721 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 722 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 728 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 729 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 730 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 732 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 733 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 734 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 736 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 737 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 738 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 740 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 741 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 742 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 744 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 745 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 746 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 748 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 749 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 750 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 752 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 753 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 754 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 756 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 757 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 758 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 760 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 761 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 762 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 768 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 769 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 770 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 772 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 773 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 774 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 776 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 777 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 778 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 780 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 781 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 782 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 784 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 785 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 786 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 788 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 789 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 790 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 792 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 793 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 794 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 796 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 797 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 798 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 804 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 805 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 806 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 808 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 809 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 810 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 812 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 813 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 814 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 816 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 817 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 818 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 820 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 821 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 822 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 824 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 825 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 826 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 828 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 829 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 830 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 832 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 833 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 834 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 840 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 841 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 842 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 844 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 845 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 846 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 848 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 849 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 850 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 852 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 853 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 854 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 856 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 857 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 858 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 860 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 861 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 862 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 864 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 865 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 866 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 868 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 869 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 870 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 871 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 872 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 873 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 874 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 875 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 876 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 877 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 878 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 879 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 880 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 881 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 882 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 883 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 884 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 885 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 886 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 887 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 888 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 889 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 890 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 891 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 892 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 893 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 894 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 895 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 896 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 897 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 898 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 899 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 900 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 901 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 902 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 903 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 904 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 905 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 906 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 907 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 908 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 909 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 910 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 911 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 912 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 913 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 914 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 915 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 916 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 917 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 918 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 919 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 920 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 921 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 922 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 923 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 924 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 925 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 926 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 927 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 928 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 929 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 930 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 931 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 932 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 933 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 934 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 935 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 936 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 937 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 938 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 939 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 940 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 941 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 942 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 943 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 944 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 945 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 946 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 947 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 948 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 949 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 950 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 951 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 952 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 953 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 954 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 955 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 956 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 957 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 958 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 959 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 960 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 961 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 962 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 963 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 964 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 965 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 966 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 967 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 968 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 969 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 970 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 971 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 972 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 973 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 974 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 975 }; 976 977 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 978 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 979 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 980 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 981 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 982 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 983 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 984 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 985 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 986 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 987 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 988 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 989 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 990 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 991 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 992 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 993 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 994 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 995 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 996 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 997 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 998 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 999 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 1000 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1001 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1002 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1003 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1004 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1005 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1006 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1007 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1008 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1009 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1010 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1011 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1012 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1013 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1014 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1015 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1016 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1017 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1018 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1019 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1020 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1021 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1022 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1023 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1024 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1025 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1026 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1027 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1028 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1029 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1030 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1031 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1032 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1033 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1034 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1035 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1036 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1037 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1038 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1039 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1040 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1041 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1042 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1043 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1044 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1045 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1046 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1047 }; 1048 1049 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1050 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1051 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1052 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1053 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1054 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1055 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1056 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1057 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1058 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1059 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1060 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1061 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1062 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1063 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1064 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1065 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1066 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1067 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1068 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1069 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1070 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1071 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1072 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1073 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1074 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1075 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1076 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1077 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1078 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1079 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1080 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1081 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1082 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1083 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1084 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1085 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1086 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1087 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1088 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1089 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1090 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1091 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1092 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1093 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1094 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1095 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1096 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1097 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1098 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1099 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1100 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1101 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1102 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1103 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1104 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1105 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1106 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1107 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1108 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1109 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1110 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1111 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1112 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1113 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1114 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1115 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1116 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1117 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1118 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1119 }; 1120 1121 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1122 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1123 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1124 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1125 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1126 }; 1127 1128 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1129 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1130 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1131 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1132 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1133 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1134 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1135 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1136 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1142 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1143 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1144 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1146 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1147 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1148 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1174 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1175 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1176 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1182 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1183 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1184 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1186 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1187 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1188 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1210 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1211 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1212 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1218 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1219 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1220 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1222 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1223 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1224 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1246 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1247 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1248 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1254 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1255 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1256 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1258 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1259 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1260 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1282 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1283 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1284 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1285 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1286 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1287 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1288 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1289 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1290 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1291 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1292 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1293 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1294 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1295 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1296 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1297 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1298 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1299 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1300 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1301 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1302 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1303 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1304 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1305 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1306 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1307 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1308 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1309 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1310 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1311 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1312 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1313 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1314 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1315 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1316 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1317 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1318 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1319 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1320 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1321 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1322 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1323 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1324 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1325 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1326 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1327 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1328 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1329 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1330 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1331 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1332 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1333 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1334 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1335 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1336 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1337 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1338 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1339 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1340 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1341 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1342 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1343 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1344 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1345 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1346 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1347 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1348 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1349 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1350 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1351 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1352 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1353 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1354 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1355 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1356 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1357 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1358 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1359 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1360 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1361 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1362 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1363 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1364 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1365 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1366 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1367 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1368 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1369 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1370 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1371 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1372 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1373 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1374 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1375 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1376 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1377 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1378 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1379 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1380 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1381 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1382 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1383 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1384 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1385 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1386 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1387 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1388 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1389 }; 1390 1391 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1392 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1393 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1394 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1395 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1396 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1397 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1398 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1399 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1400 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1401 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1402 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1403 }; 1404 1405 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1406 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1407 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1408 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1409 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1410 }; 1411 1412 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1413 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1414 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1415 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1416 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1417 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1418 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1419 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1420 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1421 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1422 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1423 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1424 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1425 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1426 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1427 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1428 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1429 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1430 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1431 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1432 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1433 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1434 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1435 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1436 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1437 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1438 }; 1439 1440 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1441 [ROTATOR_ID_0] = mmROT0_BASE, 1442 [ROTATOR_ID_1] = mmROT1_BASE 1443 }; 1444 1445 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1446 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1447 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1448 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1449 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1450 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1451 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1452 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1453 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1454 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1455 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1456 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1457 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1458 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1459 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1460 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1461 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1462 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1463 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1464 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1465 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1466 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1467 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1468 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1469 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1470 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1471 }; 1472 1473 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1474 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1475 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1476 }; 1477 1478 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1479 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1480 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1481 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1482 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1483 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1484 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1485 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1486 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1487 }; 1488 1489 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1490 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1491 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1492 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1493 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1494 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1495 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1496 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1497 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1498 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1499 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1500 }; 1501 1502 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = { 1503 RTR_ID_X_Y(2, 4), 1504 RTR_ID_X_Y(3, 4), 1505 RTR_ID_X_Y(4, 4), 1506 RTR_ID_X_Y(5, 4), 1507 RTR_ID_X_Y(6, 4), 1508 RTR_ID_X_Y(7, 4), 1509 RTR_ID_X_Y(8, 4), 1510 RTR_ID_X_Y(9, 4), 1511 RTR_ID_X_Y(10, 4), 1512 RTR_ID_X_Y(11, 4), 1513 RTR_ID_X_Y(12, 4), 1514 RTR_ID_X_Y(13, 4), 1515 RTR_ID_X_Y(14, 4), 1516 RTR_ID_X_Y(15, 4), 1517 RTR_ID_X_Y(16, 4), 1518 RTR_ID_X_Y(17, 4), 1519 RTR_ID_X_Y(2, 11), 1520 RTR_ID_X_Y(3, 11), 1521 RTR_ID_X_Y(4, 11), 1522 RTR_ID_X_Y(5, 11), 1523 RTR_ID_X_Y(6, 11), 1524 RTR_ID_X_Y(7, 11), 1525 RTR_ID_X_Y(8, 11), 1526 RTR_ID_X_Y(9, 11), 1527 RTR_ID_X_Y(0, 0),/* 24 no id */ 1528 RTR_ID_X_Y(0, 0),/* 25 no id */ 1529 RTR_ID_X_Y(0, 0),/* 26 no id */ 1530 RTR_ID_X_Y(0, 0),/* 27 no id */ 1531 RTR_ID_X_Y(14, 11), 1532 RTR_ID_X_Y(15, 11), 1533 RTR_ID_X_Y(16, 11), 1534 RTR_ID_X_Y(17, 11) 1535 }; 1536 1537 enum rtr_id { 1538 DCORE0_RTR0, 1539 DCORE0_RTR1, 1540 DCORE0_RTR2, 1541 DCORE0_RTR3, 1542 DCORE0_RTR4, 1543 DCORE0_RTR5, 1544 DCORE0_RTR6, 1545 DCORE0_RTR7, 1546 DCORE1_RTR0, 1547 DCORE1_RTR1, 1548 DCORE1_RTR2, 1549 DCORE1_RTR3, 1550 DCORE1_RTR4, 1551 DCORE1_RTR5, 1552 DCORE1_RTR6, 1553 DCORE1_RTR7, 1554 DCORE2_RTR0, 1555 DCORE2_RTR1, 1556 DCORE2_RTR2, 1557 DCORE2_RTR3, 1558 DCORE2_RTR4, 1559 DCORE2_RTR5, 1560 DCORE2_RTR6, 1561 DCORE2_RTR7, 1562 DCORE3_RTR0, 1563 DCORE3_RTR1, 1564 DCORE3_RTR2, 1565 DCORE3_RTR3, 1566 DCORE3_RTR4, 1567 DCORE3_RTR5, 1568 DCORE3_RTR6, 1569 DCORE3_RTR7, 1570 }; 1571 1572 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1573 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1574 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1575 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1576 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1577 DCORE0_RTR0 1578 }; 1579 1580 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1581 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, 1582 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, 1583 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0, 1584 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7, 1585 DCORE0_RTR0 1586 }; 1587 1588 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = { 1589 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1590 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1591 }; 1592 1593 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = { 1594 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1, 1595 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0 1596 }; 1597 1598 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1599 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1600 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1601 }; 1602 1603 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1604 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1605 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1606 }; 1607 1608 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1609 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1610 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1611 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1612 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1613 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1614 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1615 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1616 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE 1617 }; 1618 1619 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = { 1620 DCORE0_RTR0, DCORE0_RTR0 1621 }; 1622 1623 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = { 1624 DCORE0_RTR2, DCORE0_RTR2 1625 }; 1626 1627 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = { 1628 DCORE2_RTR0, DCORE3_RTR7 1629 }; 1630 1631 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = { 1632 DCORE2_RTR2, DCORE3_RTR5 1633 }; 1634 1635 struct mme_initiators_rtr_id { 1636 u32 wap0; 1637 u32 wap1; 1638 u32 write; 1639 u32 read; 1640 u32 sbte0; 1641 u32 sbte1; 1642 u32 sbte2; 1643 u32 sbte3; 1644 u32 sbte4; 1645 }; 1646 1647 enum mme_initiators { 1648 MME_WAP0 = 0, 1649 MME_WAP1, 1650 MME_WRITE, 1651 MME_READ, 1652 MME_SBTE0, 1653 MME_SBTE1, 1654 MME_SBTE2, 1655 MME_SBTE3, 1656 MME_SBTE4, 1657 MME_INITIATORS_MAX 1658 }; 1659 1660 static const struct mme_initiators_rtr_id 1661 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1662 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1663 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1664 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1665 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1666 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1667 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1668 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1669 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1670 }; 1671 1672 enum razwi_event_sources { 1673 RAZWI_TPC, 1674 RAZWI_MME, 1675 RAZWI_EDMA, 1676 RAZWI_PDMA, 1677 RAZWI_NIC, 1678 RAZWI_DEC, 1679 RAZWI_ROT 1680 }; 1681 1682 struct hbm_mc_error_causes { 1683 u32 mask; 1684 char cause[50]; 1685 }; 1686 1687 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS; 1688 1689 /* Special blocks iterator is currently used to configure security protection bits, 1690 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)- 1691 * must be skipped. Following configurations are commonly used for both PB config 1692 * and global error reading, since currently they both share the same settings. 1693 * Once it changes, we must remember to use separate configurations for either one. 1694 */ 1695 static int gaudi2_iterator_skip_block_types[] = { 1696 GAUDI2_BLOCK_TYPE_PLL, 1697 GAUDI2_BLOCK_TYPE_EU_BIST, 1698 GAUDI2_BLOCK_TYPE_HBM, 1699 GAUDI2_BLOCK_TYPE_XFT 1700 }; 1701 1702 static struct range gaudi2_iterator_skip_block_ranges[] = { 1703 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */ 1704 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE}, 1705 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE}, 1706 /* Skip all CPU blocks except for CPU_IF */ 1707 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE}, 1708 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE} 1709 }; 1710 1711 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 1712 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 1713 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 1714 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 1715 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 1716 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 1717 }; 1718 1719 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 1720 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 1721 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 1722 [HBM_SEI_READ_ERR] = "SEI read data error", 1723 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 1724 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 1725 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 1726 [HBM_SEI_DFI] = "SEI DFI error", 1727 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 1728 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 1729 }; 1730 1731 struct mmu_spi_sei_cause { 1732 char cause[50]; 1733 int clear_bit; 1734 }; 1735 1736 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 1737 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 1738 {"page access", 1}, /* INTERRUPT_CLR[1] */ 1739 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 1740 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 1741 {"mmu rei0", -1}, /* no clear register bit */ 1742 {"mmu rei1", -1}, /* no clear register bit */ 1743 {"stlb rei0", -1}, /* no clear register bit */ 1744 {"stlb rei1", -1}, /* no clear register bit */ 1745 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 1746 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 1747 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 1748 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 1749 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1750 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1751 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1752 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1753 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 1754 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 1755 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 1756 }; 1757 1758 struct gaudi2_cache_invld_params { 1759 u64 start_va; 1760 u64 end_va; 1761 u32 inv_start_val; 1762 u32 flags; 1763 bool range_invalidation; 1764 }; 1765 1766 struct gaudi2_tpc_idle_data { 1767 struct engines_data *e; 1768 unsigned long *mask; 1769 bool *is_idle; 1770 const char *tpc_fmt; 1771 }; 1772 1773 struct gaudi2_tpc_mmu_data { 1774 u32 rw_asid; 1775 }; 1776 1777 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 1778 1779 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 1780 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 1781 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 1782 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1783 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1784 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 1785 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 1786 bool is_memset); 1787 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 1788 1789 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 1790 { 1791 1792 } 1793 1794 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 1795 { 1796 return sizeof(struct packet_msg_short); 1797 } 1798 1799 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 1800 { 1801 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 1802 } 1803 1804 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 1805 { 1806 struct asic_fixed_properties *prop = &hdev->asic_prop; 1807 int dcore, inst, tpc_seq; 1808 u32 offset; 1809 1810 /* init the return code */ 1811 ctx->rc = 0; 1812 1813 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 1814 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 1815 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 1816 1817 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 1818 continue; 1819 1820 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 1821 1822 ctx->fn(hdev, dcore, inst, offset, ctx); 1823 if (ctx->rc) { 1824 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 1825 dcore, inst); 1826 return; 1827 } 1828 } 1829 } 1830 1831 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 1832 return; 1833 1834 /* special check for PCI TPC (DCORE0_TPC6) */ 1835 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 1836 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 1837 if (ctx->rc) 1838 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 1839 } 1840 1841 static bool gaudi2_host_phys_addr_valid(u64 addr) 1842 { 1843 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 1844 return true; 1845 1846 return false; 1847 } 1848 1849 static int set_number_of_functional_hbms(struct hl_device *hdev) 1850 { 1851 struct asic_fixed_properties *prop = &hdev->asic_prop; 1852 u8 faulty_hbms = hweight64(hdev->dram_binning); 1853 1854 /* check if all HBMs should be used */ 1855 if (!faulty_hbms) { 1856 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 1857 prop->num_functional_hbms = GAUDI2_HBM_NUM; 1858 return 0; 1859 } 1860 1861 /* 1862 * check for error condition in which number of binning 1863 * candidates is higher than the maximum supported by the 1864 * driver (in which case binning mask shall be ignored and driver will 1865 * set the default) 1866 */ 1867 if (faulty_hbms > MAX_FAULTY_HBMS) { 1868 dev_err(hdev->dev, 1869 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 1870 MAX_FAULTY_HBMS, hdev->dram_binning); 1871 return -EINVAL; 1872 } 1873 1874 /* 1875 * by default, number of functional HBMs in Gaudi2 is always 1876 * GAUDI2_HBM_NUM - 1. 1877 */ 1878 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 1879 return 0; 1880 } 1881 1882 static int gaudi2_set_dram_properties(struct hl_device *hdev) 1883 { 1884 struct asic_fixed_properties *prop = &hdev->asic_prop; 1885 u32 basic_hbm_page_size; 1886 int rc; 1887 1888 rc = set_number_of_functional_hbms(hdev); 1889 if (rc) 1890 return -EINVAL; 1891 1892 /* 1893 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 1894 * in which we are using x16 bigger page size to be able to populate the entire 1895 * HBM mappings in the TLB 1896 */ 1897 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 1898 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 1899 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 1900 prop->dram_size = prop->num_functional_hbms * SZ_16G; 1901 prop->dram_base_address = DRAM_PHYS_BASE; 1902 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 1903 prop->dram_supports_virtual_memory = true; 1904 1905 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 1906 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 1907 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 1908 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 1909 1910 /* since DRAM page size differs from DMMU page size we need to allocate 1911 * DRAM memory in units of dram_page size and mapping this memory in 1912 * units of DMMU page size. we overcome this size mismatch using a 1913 * scrambling routine which takes a DRAM page and converts it to a DMMU 1914 * page. 1915 * We therefore: 1916 * 1. partition the virtual address space to DRAM-page (whole) pages. 1917 * (suppose we get n such pages) 1918 * 2. limit the amount of virtual address space we got from 1 above to 1919 * a multiple of 64M as we don't want the scrambled address to cross 1920 * the DRAM virtual address space. 1921 * ( m = (n * DRAM_page_size) / DMMU_page_size). 1922 * 3. determine the and address accordingly 1923 * end_addr = start_addr + m * 48M 1924 * 1925 * the DRAM address MSBs (63:48) are not part of the roundup calculation 1926 */ 1927 prop->dmmu.start_addr = prop->dram_base_address + 1928 (prop->dram_page_size * 1929 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 1930 1931 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 1932 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 1933 1934 return 0; 1935 } 1936 1937 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 1938 { 1939 struct asic_fixed_properties *prop = &hdev->asic_prop; 1940 struct hw_queue_properties *q_props; 1941 u32 num_sync_stream_queues = 0; 1942 int i; 1943 1944 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 1945 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 1946 GFP_KERNEL); 1947 1948 if (!prop->hw_queues_props) 1949 return -ENOMEM; 1950 1951 q_props = prop->hw_queues_props; 1952 1953 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 1954 q_props[i].type = QUEUE_TYPE_HW; 1955 q_props[i].driver_only = 0; 1956 1957 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 1958 q_props[i].supports_sync_stream = 0; 1959 } else { 1960 q_props[i].supports_sync_stream = 1; 1961 num_sync_stream_queues++; 1962 } 1963 1964 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 1965 } 1966 1967 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 1968 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 1969 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 1970 1971 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 1972 prop->cfg_base_address = CFG_BASE; 1973 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 1974 prop->host_base_address = HOST_PHYS_BASE_0; 1975 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 1976 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 1977 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 1978 prop->user_dec_intr_count = NUMBER_OF_DEC; 1979 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 1980 prop->completion_mode = HL_COMPLETION_MODE_CS; 1981 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 1982 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 1983 1984 prop->sram_base_address = SRAM_BASE_ADDR; 1985 prop->sram_size = SRAM_SIZE; 1986 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 1987 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 1988 1989 prop->hints_range_reservation = true; 1990 1991 if (hdev->pldm) 1992 prop->mmu_pgt_size = 0x800000; /* 8MB */ 1993 else 1994 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 1995 1996 prop->mmu_pte_size = HL_PTE_SIZE; 1997 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 1998 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 1999 2000 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 2001 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 2002 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 2003 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 2004 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 2005 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 2006 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 2007 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 2008 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 2009 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 2010 prop->dmmu.page_size = PAGE_SIZE_1GB; 2011 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 2012 prop->dmmu.last_mask = LAST_MASK; 2013 prop->dmmu.host_resident = 1; 2014 /* TODO: will be duplicated until implementing per-MMU props */ 2015 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 2016 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2017 2018 /* 2019 * this is done in order to be able to validate FW descriptor (i.e. validating that 2020 * the addresses and allocated space for FW image does not cross memory bounds). 2021 * for this reason we set the DRAM size to the minimum possible and later it will 2022 * be modified according to what reported in the cpucp info packet 2023 */ 2024 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 2025 2026 hdev->pmmu_huge_range = true; 2027 prop->pmmu.host_resident = 1; 2028 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 2029 prop->pmmu.last_mask = LAST_MASK; 2030 /* TODO: will be duplicated until implementing per-MMU props */ 2031 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 2032 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2033 2034 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 2035 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 2036 prop->hints_host_hpage_reserved_va_range.start_addr = 2037 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 2038 prop->hints_host_hpage_reserved_va_range.end_addr = 2039 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 2040 2041 if (PAGE_SIZE == SZ_64K) { 2042 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 2043 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 2044 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 2045 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 2046 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 2047 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 2048 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 2049 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 2050 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 2051 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 2052 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2053 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2054 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2055 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2056 prop->pmmu.page_size = PAGE_SIZE_64KB; 2057 2058 /* shifts and masks are the same in PMMU and HPMMU */ 2059 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2060 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2061 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2062 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2063 } else { 2064 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2065 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2066 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2067 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2068 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2069 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2070 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2071 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2072 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2073 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2074 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2075 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2076 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2077 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2078 prop->pmmu.page_size = PAGE_SIZE_4KB; 2079 2080 /* shifts and masks are the same in PMMU and HPMMU */ 2081 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2082 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2083 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2084 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2085 } 2086 2087 prop->num_engine_cores = CPU_ID_MAX; 2088 prop->cfg_size = CFG_SIZE; 2089 prop->max_asid = MAX_ASID; 2090 prop->num_of_events = GAUDI2_EVENT_SIZE; 2091 2092 prop->dc_power_default = DC_POWER_DEFAULT; 2093 2094 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2095 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2096 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2097 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2098 2099 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2100 2101 prop->mme_master_slave_mode = 1; 2102 2103 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2104 (num_sync_stream_queues * HL_RSVD_SOBS); 2105 2106 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2107 (num_sync_stream_queues * HL_RSVD_MONS); 2108 2109 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2110 2111 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2112 2113 prop->fw_cpu_boot_dev_sts0_valid = false; 2114 prop->fw_cpu_boot_dev_sts1_valid = false; 2115 prop->hard_reset_done_by_fw = false; 2116 prop->gic_interrupts_enable = true; 2117 2118 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2119 2120 prop->max_dec = NUMBER_OF_DEC; 2121 2122 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2123 2124 prop->dma_mask = 64; 2125 2126 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; 2127 2128 return 0; 2129 } 2130 2131 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2132 { 2133 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2134 bool is_wc[3] = {false, false, true}; 2135 int rc; 2136 2137 rc = hl_pci_bars_map(hdev, name, is_wc); 2138 if (rc) 2139 return rc; 2140 2141 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2142 2143 return 0; 2144 } 2145 2146 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2147 { 2148 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2149 struct hl_inbound_pci_region pci_region; 2150 u64 old_addr = addr; 2151 int rc; 2152 2153 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2154 return old_addr; 2155 2156 if (hdev->asic_prop.iatu_done_by_fw) 2157 return U64_MAX; 2158 2159 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2160 pci_region.mode = PCI_BAR_MATCH_MODE; 2161 pci_region.bar = DRAM_BAR_ID; 2162 pci_region.addr = addr; 2163 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2164 if (rc) 2165 return U64_MAX; 2166 2167 if (gaudi2) { 2168 old_addr = gaudi2->dram_bar_cur_addr; 2169 gaudi2->dram_bar_cur_addr = addr; 2170 } 2171 2172 return old_addr; 2173 } 2174 2175 static int gaudi2_init_iatu(struct hl_device *hdev) 2176 { 2177 struct hl_inbound_pci_region inbound_region; 2178 struct hl_outbound_pci_region outbound_region; 2179 u32 bar_addr_low, bar_addr_high; 2180 int rc; 2181 2182 if (hdev->asic_prop.iatu_done_by_fw) 2183 return 0; 2184 2185 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2186 * We must map this region in BAR match mode in order to 2187 * fetch BAR physical base address 2188 */ 2189 inbound_region.mode = PCI_BAR_MATCH_MODE; 2190 inbound_region.bar = SRAM_CFG_BAR_ID; 2191 /* Base address must be aligned to Bar size which is 256 MB */ 2192 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2193 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2194 if (rc) 2195 return rc; 2196 2197 /* Fetch physical BAR address */ 2198 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2199 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2200 2201 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2202 2203 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2204 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2205 inbound_region.bar = SRAM_CFG_BAR_ID; 2206 inbound_region.offset_in_bar = 0; 2207 inbound_region.addr = STM_FLASH_BASE_ADDR; 2208 inbound_region.size = CFG_REGION_SIZE; 2209 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2210 if (rc) 2211 return rc; 2212 2213 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2214 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2215 inbound_region.bar = SRAM_CFG_BAR_ID; 2216 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2217 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2218 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2219 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2220 if (rc) 2221 return rc; 2222 2223 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2224 inbound_region.mode = PCI_BAR_MATCH_MODE; 2225 inbound_region.bar = DRAM_BAR_ID; 2226 inbound_region.addr = DRAM_PHYS_BASE; 2227 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2228 if (rc) 2229 return rc; 2230 2231 /* Outbound Region 0 - Point to Host */ 2232 outbound_region.addr = HOST_PHYS_BASE_0; 2233 outbound_region.size = HOST_PHYS_SIZE_0; 2234 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2235 2236 return rc; 2237 } 2238 2239 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2240 { 2241 return RREG32(mmHW_STATE); 2242 } 2243 2244 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2245 { 2246 struct asic_fixed_properties *prop = &hdev->asic_prop; 2247 2248 /* 2249 * check for error condition in which number of binning candidates 2250 * is higher than the maximum supported by the driver 2251 */ 2252 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2253 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2254 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2255 hdev->tpc_binning); 2256 return -EINVAL; 2257 } 2258 2259 prop->tpc_binning_mask = hdev->tpc_binning; 2260 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2261 2262 return 0; 2263 } 2264 2265 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2266 { 2267 struct asic_fixed_properties *prop = &hdev->asic_prop; 2268 struct hw_queue_properties *q_props = prop->hw_queues_props; 2269 u64 tpc_binning_mask; 2270 u8 subst_idx = 0; 2271 int i, rc; 2272 2273 rc = gaudi2_tpc_binning_init_prop(hdev); 2274 if (rc) 2275 return rc; 2276 2277 tpc_binning_mask = prop->tpc_binning_mask; 2278 2279 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2280 u8 subst_seq, binned, qid_base; 2281 2282 if (tpc_binning_mask == 0) 2283 break; 2284 2285 if (subst_idx == 0) { 2286 subst_seq = TPC_ID_DCORE0_TPC6; 2287 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2288 } else { 2289 subst_seq = TPC_ID_DCORE3_TPC5; 2290 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2291 } 2292 2293 2294 /* clear bit from mask */ 2295 binned = __ffs(tpc_binning_mask); 2296 /* 2297 * Coverity complains about possible out-of-bound access in 2298 * clear_bit 2299 */ 2300 if (binned >= TPC_ID_SIZE) { 2301 dev_err(hdev->dev, 2302 "Invalid binned TPC (binning mask: %llx)\n", 2303 tpc_binning_mask); 2304 return -EINVAL; 2305 } 2306 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2307 2308 /* also clear replacing TPC bit from enabled mask */ 2309 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2310 2311 /* bin substite TPC's Qs */ 2312 q_props[qid_base].binned = 1; 2313 q_props[qid_base + 1].binned = 1; 2314 q_props[qid_base + 2].binned = 1; 2315 q_props[qid_base + 3].binned = 1; 2316 2317 subst_idx++; 2318 } 2319 2320 return 0; 2321 } 2322 2323 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2324 { 2325 struct asic_fixed_properties *prop = &hdev->asic_prop; 2326 u8 num_faulty; 2327 2328 num_faulty = hweight32(hdev->decoder_binning); 2329 2330 /* 2331 * check for error condition in which number of binning candidates 2332 * is higher than the maximum supported by the driver 2333 */ 2334 if (num_faulty > MAX_FAULTY_DECODERS) { 2335 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2336 hdev->decoder_binning); 2337 return -EINVAL; 2338 } 2339 2340 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2341 2342 if (prop->decoder_binning_mask) 2343 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2344 else 2345 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2346 2347 return 0; 2348 } 2349 2350 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2351 { 2352 struct asic_fixed_properties *prop = &hdev->asic_prop; 2353 2354 /* check if we should override default binning */ 2355 if (!hdev->dram_binning) { 2356 prop->dram_binning_mask = 0; 2357 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2358 return; 2359 } 2360 2361 /* set DRAM binning constraints */ 2362 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2363 prop->dram_binning_mask = hdev->dram_binning; 2364 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2365 } 2366 2367 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2368 { 2369 struct asic_fixed_properties *prop = &hdev->asic_prop; 2370 struct hw_queue_properties *q_props; 2371 u8 seq, num_faulty; 2372 2373 num_faulty = hweight32(hdev->edma_binning); 2374 2375 /* 2376 * check for error condition in which number of binning candidates 2377 * is higher than the maximum supported by the driver 2378 */ 2379 if (num_faulty > MAX_FAULTY_EDMAS) { 2380 dev_err(hdev->dev, 2381 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2382 hdev->edma_binning); 2383 return -EINVAL; 2384 } 2385 2386 if (!hdev->edma_binning) { 2387 prop->edma_binning_mask = 0; 2388 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2389 return 0; 2390 } 2391 2392 seq = __ffs((unsigned long)hdev->edma_binning); 2393 2394 /* set binning constraints */ 2395 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2396 prop->edma_binning_mask = hdev->edma_binning; 2397 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2398 2399 /* bin substitute EDMA's queue */ 2400 q_props = prop->hw_queues_props; 2401 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2402 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2403 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2404 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2405 2406 return 0; 2407 } 2408 2409 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2410 { 2411 struct asic_fixed_properties *prop = &hdev->asic_prop; 2412 u8 num_faulty, seq; 2413 2414 /* check if we should override default binning */ 2415 if (!xbar_edge_iso_mask) { 2416 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2417 return 0; 2418 } 2419 2420 /* 2421 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2422 * only the FW can set a redundancy value). for user it'll always be 0. 2423 */ 2424 num_faulty = hweight32(xbar_edge_iso_mask); 2425 2426 /* 2427 * check for error condition in which number of binning candidates 2428 * is higher than the maximum supported by the driver 2429 */ 2430 if (num_faulty > MAX_FAULTY_XBARS) { 2431 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2432 MAX_FAULTY_XBARS); 2433 return -EINVAL; 2434 } 2435 2436 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2437 2438 /* set binning constraints */ 2439 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2440 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2441 2442 return 0; 2443 } 2444 2445 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2446 { 2447 int rc; 2448 2449 /* 2450 * mark all clusters as good, each component will "fail" cluster 2451 * based on eFuse/user values. 2452 * If more than single cluster is faulty- the chip is unusable 2453 */ 2454 hdev->asic_prop.faulty_dram_cluster_map = 0; 2455 2456 gaudi2_set_dram_binning_masks(hdev); 2457 2458 rc = gaudi2_set_edma_binning_masks(hdev); 2459 if (rc) 2460 return rc; 2461 2462 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2463 if (rc) 2464 return rc; 2465 2466 2467 /* always initially set to full mask */ 2468 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2469 2470 return 0; 2471 } 2472 2473 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2474 { 2475 struct asic_fixed_properties *prop = &hdev->asic_prop; 2476 int rc; 2477 2478 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2479 if (rc) 2480 return rc; 2481 2482 /* if we have DRAM binning reported by FW we should perform cluster config */ 2483 if (prop->faulty_dram_cluster_map) { 2484 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2485 2486 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2487 } 2488 2489 return 0; 2490 } 2491 2492 static int gaudi2_set_binning_masks(struct hl_device *hdev) 2493 { 2494 int rc; 2495 2496 rc = gaudi2_set_cluster_binning_masks(hdev); 2497 if (rc) 2498 return rc; 2499 2500 rc = gaudi2_set_tpc_binning_masks(hdev); 2501 if (rc) 2502 return rc; 2503 2504 rc = gaudi2_set_dec_binning_masks(hdev); 2505 if (rc) 2506 return rc; 2507 2508 return 0; 2509 } 2510 2511 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2512 { 2513 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2514 struct asic_fixed_properties *prop = &hdev->asic_prop; 2515 long max_power; 2516 u64 dram_size; 2517 int rc; 2518 2519 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2520 return 0; 2521 2522 /* No point of asking this information again when not doing hard reset, as the device 2523 * CPU hasn't been reset 2524 */ 2525 if (hdev->reset_info.in_compute_reset) 2526 return 0; 2527 2528 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2529 mmCPU_BOOT_ERR1); 2530 if (rc) 2531 return rc; 2532 2533 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2534 if (dram_size) { 2535 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2536 2537 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2538 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2539 dev_err(hdev->dev, 2540 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2541 dram_size, prop->dram_size); 2542 dram_size = prop->dram_size; 2543 } 2544 2545 prop->dram_size = dram_size; 2546 prop->dram_end_address = prop->dram_base_address + dram_size; 2547 } 2548 2549 if (!strlen(prop->cpucp_info.card_name)) 2550 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2551 2552 /* Overwrite binning masks with the actual binning values from F/W */ 2553 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2554 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2555 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2556 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2557 2558 /* 2559 * at this point the DRAM parameters need to be updated according to data obtained 2560 * from the FW 2561 */ 2562 rc = hdev->asic_funcs->set_dram_properties(hdev); 2563 if (rc) 2564 return rc; 2565 2566 rc = hdev->asic_funcs->set_binning_masks(hdev); 2567 if (rc) 2568 return rc; 2569 2570 max_power = hl_fw_get_max_power(hdev); 2571 if (max_power < 0) 2572 return max_power; 2573 2574 prop->max_power_default = (u64) max_power; 2575 2576 return 0; 2577 } 2578 2579 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2580 { 2581 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2582 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2583 int rc; 2584 2585 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2586 return 0; 2587 2588 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2589 if (rc) 2590 return rc; 2591 2592 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2593 2594 return 0; 2595 } 2596 2597 static int gaudi2_early_init(struct hl_device *hdev) 2598 { 2599 struct asic_fixed_properties *prop = &hdev->asic_prop; 2600 struct pci_dev *pdev = hdev->pdev; 2601 resource_size_t pci_bar_size; 2602 int rc; 2603 2604 rc = gaudi2_set_fixed_properties(hdev); 2605 if (rc) 2606 return rc; 2607 2608 /* Check BAR sizes */ 2609 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2610 2611 if (pci_bar_size != CFG_BAR_SIZE) { 2612 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2613 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2614 rc = -ENODEV; 2615 goto free_queue_props; 2616 } 2617 2618 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2619 if (pci_bar_size != MSIX_BAR_SIZE) { 2620 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2621 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2622 rc = -ENODEV; 2623 goto free_queue_props; 2624 } 2625 2626 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2627 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2628 2629 /* 2630 * Only in pldm driver config iATU 2631 */ 2632 if (hdev->pldm) 2633 hdev->asic_prop.iatu_done_by_fw = false; 2634 else 2635 hdev->asic_prop.iatu_done_by_fw = true; 2636 2637 rc = hl_pci_init(hdev); 2638 if (rc) 2639 goto free_queue_props; 2640 2641 /* Before continuing in the initialization, we need to read the preboot 2642 * version to determine whether we run with a security-enabled firmware 2643 */ 2644 rc = hl_fw_read_preboot_status(hdev); 2645 if (rc) { 2646 if (hdev->reset_on_preboot_fail) 2647 hdev->asic_funcs->hw_fini(hdev, true, false); 2648 goto pci_fini; 2649 } 2650 2651 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2652 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2653 hdev->asic_funcs->hw_fini(hdev, true, false); 2654 } 2655 2656 return 0; 2657 2658 pci_fini: 2659 hl_pci_fini(hdev); 2660 free_queue_props: 2661 kfree(hdev->asic_prop.hw_queues_props); 2662 return rc; 2663 } 2664 2665 static int gaudi2_early_fini(struct hl_device *hdev) 2666 { 2667 kfree(hdev->asic_prop.hw_queues_props); 2668 hl_pci_fini(hdev); 2669 2670 return 0; 2671 } 2672 2673 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 2674 { 2675 switch (arc_id) { 2676 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 2677 return true; 2678 default: 2679 return false; 2680 } 2681 } 2682 2683 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 2684 { 2685 switch (arc_id) { 2686 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 2687 return true; 2688 default: 2689 return false; 2690 } 2691 } 2692 2693 static void gaudi2_init_arcs(struct hl_device *hdev) 2694 { 2695 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2696 u64 arc_id; 2697 u32 i; 2698 2699 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 2700 if (gaudi2_is_arc_enabled(hdev, i)) 2701 continue; 2702 2703 gaudi2_set_arc_id_cap(hdev, i); 2704 } 2705 2706 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 2707 if (!gaudi2_is_queue_enabled(hdev, i)) 2708 continue; 2709 2710 arc_id = gaudi2_queue_id_to_arc_id[i]; 2711 if (gaudi2_is_arc_enabled(hdev, arc_id)) 2712 continue; 2713 2714 if (gaudi2_is_arc_nic_owned(arc_id) && 2715 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 2716 continue; 2717 2718 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 2719 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 2720 continue; 2721 2722 gaudi2_set_arc_id_cap(hdev, arc_id); 2723 } 2724 } 2725 2726 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 2727 { 2728 u32 reg_base, reg_val; 2729 int rc; 2730 2731 switch (cpu_id) { 2732 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 2733 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 2734 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2735 ARC_DCCM_BLOCK_SIZE * 2, true); 2736 if (rc) 2737 return rc; 2738 break; 2739 case CPU_ID_SCHED_ARC4: 2740 case CPU_ID_SCHED_ARC5: 2741 case CPU_ID_MME_QMAN_ARC0: 2742 case CPU_ID_MME_QMAN_ARC1: 2743 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 2744 2745 /* Scrub lower DCCM block */ 2746 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2747 ARC_DCCM_BLOCK_SIZE, true); 2748 if (rc) 2749 return rc; 2750 2751 /* Switch to upper DCCM block */ 2752 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 2753 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2754 2755 /* Scrub upper DCCM block */ 2756 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2757 ARC_DCCM_BLOCK_SIZE, true); 2758 if (rc) 2759 return rc; 2760 2761 /* Switch to lower DCCM block */ 2762 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 2763 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2764 break; 2765 default: 2766 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2767 ARC_DCCM_BLOCK_SIZE, true); 2768 if (rc) 2769 return rc; 2770 } 2771 2772 return 0; 2773 } 2774 2775 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 2776 { 2777 u16 arc_id; 2778 2779 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 2780 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 2781 continue; 2782 2783 gaudi2_scrub_arc_dccm(hdev, arc_id); 2784 } 2785 } 2786 2787 static int gaudi2_late_init(struct hl_device *hdev) 2788 { 2789 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2790 int rc; 2791 2792 hdev->asic_prop.supports_advanced_cpucp_rc = true; 2793 2794 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 2795 gaudi2->virt_msix_db_dma_addr); 2796 if (rc) { 2797 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 2798 return rc; 2799 } 2800 2801 rc = gaudi2_fetch_psoc_frequency(hdev); 2802 if (rc) { 2803 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 2804 goto disable_pci_access; 2805 } 2806 2807 gaudi2_init_arcs(hdev); 2808 gaudi2_scrub_arcs_dccm(hdev); 2809 gaudi2_init_security(hdev); 2810 2811 return 0; 2812 2813 disable_pci_access: 2814 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2815 2816 return rc; 2817 } 2818 2819 static void gaudi2_late_fini(struct hl_device *hdev) 2820 { 2821 hl_hwmon_release_resources(hdev); 2822 } 2823 2824 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 2825 { 2826 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2827 2828 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2829 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2830 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2831 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2832 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2833 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2834 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2835 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2836 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2837 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2838 } 2839 2840 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 2841 { 2842 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2843 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2844 u32 block_size, umr_start_idx, num_umr_blocks; 2845 int i; 2846 2847 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 2848 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 2849 block_size = ARC_DCCM_BLOCK_SIZE * 2; 2850 else 2851 block_size = ARC_DCCM_BLOCK_SIZE; 2852 2853 blocks[i].address = gaudi2_arc_dccm_bases[i]; 2854 blocks[i].size = block_size; 2855 } 2856 2857 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 2858 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 2859 2860 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 2861 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 2862 2863 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 2864 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 2865 2866 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 2867 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 2868 2869 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 2870 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 2871 2872 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 2873 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 2874 2875 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 2876 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 2877 2878 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 2879 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 2880 2881 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 2882 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 2883 for (i = 0 ; i < num_umr_blocks ; i++) { 2884 u8 nic_id, umr_block_id; 2885 2886 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 2887 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 2888 2889 blocks[umr_start_idx + i].address = 2890 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 2891 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 2892 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 2893 umr_block_id * NIC_UMR_OFFSET; 2894 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 2895 } 2896 2897 /* Expose decoder HW configuration block to user */ 2898 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 2899 2900 for (i = 1; i < NUM_OF_DCORES; ++i) { 2901 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 2902 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 2903 2904 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 2905 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 2906 2907 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 2908 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 2909 } 2910 } 2911 2912 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 2913 { 2914 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 2915 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 2916 int i, j, rc = 0; 2917 2918 /* The device ARC works with 32-bits addresses, and because there is a single HW register 2919 * that holds the extension bits (49..28), these bits must be identical in all the allocated 2920 * range. 2921 */ 2922 2923 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 2924 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 2925 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 2926 if (!virt_addr_arr[i]) { 2927 rc = -ENOMEM; 2928 goto free_dma_mem_arr; 2929 } 2930 2931 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 2932 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 2933 break; 2934 } 2935 2936 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 2937 dev_err(hdev->dev, 2938 "MSB of ARC accessible DMA memory are not identical in all range\n"); 2939 rc = -EFAULT; 2940 goto free_dma_mem_arr; 2941 } 2942 2943 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 2944 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 2945 2946 free_dma_mem_arr: 2947 for (j = 0 ; j < i ; j++) 2948 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 2949 dma_addr_arr[j]); 2950 2951 return rc; 2952 } 2953 2954 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 2955 { 2956 struct asic_fixed_properties *prop = &hdev->asic_prop; 2957 struct pci_mem_region *region; 2958 2959 /* CFG */ 2960 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 2961 region->region_base = CFG_BASE; 2962 region->region_size = CFG_SIZE; 2963 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 2964 region->bar_size = CFG_BAR_SIZE; 2965 region->bar_id = SRAM_CFG_BAR_ID; 2966 region->used = 1; 2967 2968 /* SRAM */ 2969 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 2970 region->region_base = SRAM_BASE_ADDR; 2971 region->region_size = SRAM_SIZE; 2972 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 2973 region->bar_size = CFG_BAR_SIZE; 2974 region->bar_id = SRAM_CFG_BAR_ID; 2975 region->used = 1; 2976 2977 /* DRAM */ 2978 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 2979 region->region_base = DRAM_PHYS_BASE; 2980 region->region_size = hdev->asic_prop.dram_size; 2981 region->offset_in_bar = 0; 2982 region->bar_size = prop->dram_pci_bar_size; 2983 region->bar_id = DRAM_BAR_ID; 2984 region->used = 1; 2985 } 2986 2987 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 2988 { 2989 struct asic_fixed_properties *prop = &hdev->asic_prop; 2990 int i, j, k; 2991 2992 /* Initialize common user CQ interrupt */ 2993 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 2994 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ); 2995 2996 /* Initialize common decoder interrupt */ 2997 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 2998 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER); 2999 3000 /* User interrupts structure holds both decoder and user interrupts from various engines. 3001 * We first initialize the decoder interrupts and then we add the user interrupts. 3002 * The only limitation is that the last decoder interrupt id must be smaller 3003 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 3004 */ 3005 3006 /* Initialize decoder interrupts, expose only normal interrupts, 3007 * error interrupts to be handled by driver 3008 */ 3009 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 3010 i += 2, j++) 3011 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, 3012 HL_USR_INTERRUPT_DECODER); 3013 3014 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 3015 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ); 3016 } 3017 3018 static inline int gaudi2_get_non_zero_random_int(void) 3019 { 3020 int rand = get_random_u32(); 3021 3022 return rand ? rand : 1; 3023 } 3024 3025 static void gaudi2_special_blocks_free(struct hl_device *hdev) 3026 { 3027 struct asic_fixed_properties *prop = &hdev->asic_prop; 3028 struct hl_skip_blocks_cfg *skip_special_blocks_cfg = 3029 &prop->skip_special_blocks_cfg; 3030 3031 kfree(prop->special_blocks); 3032 kfree(skip_special_blocks_cfg->block_types); 3033 kfree(skip_special_blocks_cfg->block_ranges); 3034 } 3035 3036 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev) 3037 { 3038 gaudi2_special_blocks_free(hdev); 3039 } 3040 3041 static bool gaudi2_special_block_skip(struct hl_device *hdev, 3042 struct hl_special_blocks_cfg *special_blocks_cfg, 3043 u32 blk_idx, u32 major, u32 minor, u32 sub_minor) 3044 { 3045 return false; 3046 } 3047 3048 static int gaudi2_special_blocks_config(struct hl_device *hdev) 3049 { 3050 struct asic_fixed_properties *prop = &hdev->asic_prop; 3051 int i, rc; 3052 3053 /* Configure Special blocks */ 3054 prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; 3055 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); 3056 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, 3057 sizeof(*prop->special_blocks), GFP_KERNEL); 3058 if (!prop->special_blocks) 3059 return -ENOMEM; 3060 3061 for (i = 0 ; i < prop->num_of_special_blocks ; i++) 3062 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i], 3063 sizeof(*prop->special_blocks)); 3064 3065 /* Configure when to skip Special blocks */ 3066 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg)); 3067 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip; 3068 3069 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) { 3070 prop->skip_special_blocks_cfg.block_types = 3071 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types), 3072 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL); 3073 if (!prop->skip_special_blocks_cfg.block_types) { 3074 rc = -ENOMEM; 3075 goto free_special_blocks; 3076 } 3077 3078 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types, 3079 sizeof(gaudi2_iterator_skip_block_types)); 3080 3081 prop->skip_special_blocks_cfg.block_types_len = 3082 ARRAY_SIZE(gaudi2_iterator_skip_block_types); 3083 } 3084 3085 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) { 3086 prop->skip_special_blocks_cfg.block_ranges = 3087 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges), 3088 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL); 3089 if (!prop->skip_special_blocks_cfg.block_ranges) { 3090 rc = -ENOMEM; 3091 goto free_skip_special_blocks_types; 3092 } 3093 3094 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++) 3095 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i], 3096 &gaudi2_iterator_skip_block_ranges[i], 3097 sizeof(struct range)); 3098 3099 prop->skip_special_blocks_cfg.block_ranges_len = 3100 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges); 3101 } 3102 3103 return 0; 3104 3105 free_skip_special_blocks_types: 3106 kfree(prop->skip_special_blocks_cfg.block_types); 3107 free_special_blocks: 3108 kfree(prop->special_blocks); 3109 3110 return rc; 3111 } 3112 3113 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev) 3114 { 3115 return gaudi2_special_blocks_config(hdev); 3116 } 3117 3118 static int gaudi2_sw_init(struct hl_device *hdev) 3119 { 3120 struct asic_fixed_properties *prop = &hdev->asic_prop; 3121 struct gaudi2_device *gaudi2; 3122 int i, rc; 3123 3124 /* Allocate device structure */ 3125 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 3126 if (!gaudi2) 3127 return -ENOMEM; 3128 3129 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 3130 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 3131 continue; 3132 3133 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 3134 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 3135 GAUDI2_EVENT_SIZE); 3136 rc = -EINVAL; 3137 goto free_gaudi2_device; 3138 } 3139 3140 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 3141 } 3142 3143 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 3144 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 3145 3146 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 3147 3148 hdev->asic_specific = gaudi2; 3149 3150 /* Create DMA pool for small allocations. 3151 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 3152 * PI/CI registers allocated from this pool have this restriction 3153 */ 3154 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 3155 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 3156 if (!hdev->dma_pool) { 3157 dev_err(hdev->dev, "failed to create DMA pool\n"); 3158 rc = -ENOMEM; 3159 goto free_gaudi2_device; 3160 } 3161 3162 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3163 if (rc) 3164 goto free_dma_pool; 3165 3166 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3167 if (!hdev->cpu_accessible_dma_pool) { 3168 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3169 rc = -ENOMEM; 3170 goto free_cpu_dma_mem; 3171 } 3172 3173 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3174 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3175 if (rc) { 3176 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3177 rc = -EFAULT; 3178 goto free_cpu_accessible_dma_pool; 3179 } 3180 3181 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3182 &gaudi2->virt_msix_db_dma_addr); 3183 if (!gaudi2->virt_msix_db_cpu_addr) { 3184 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3185 rc = -ENOMEM; 3186 goto free_cpu_accessible_dma_pool; 3187 } 3188 3189 spin_lock_init(&gaudi2->hw_queues_lock); 3190 3191 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3192 &gaudi2->scratchpad_bus_address, 3193 GFP_KERNEL | __GFP_ZERO); 3194 if (!gaudi2->scratchpad_kernel_address) { 3195 rc = -ENOMEM; 3196 goto free_virt_msix_db_mem; 3197 } 3198 3199 gaudi2_user_mapped_blocks_init(hdev); 3200 3201 /* Initialize user interrupts */ 3202 gaudi2_user_interrupt_setup(hdev); 3203 3204 hdev->supports_coresight = true; 3205 hdev->supports_sync_stream = true; 3206 hdev->supports_cb_mapping = true; 3207 hdev->supports_wait_for_multi_cs = false; 3208 3209 prop->supports_compute_reset = true; 3210 3211 hdev->asic_funcs->set_pci_memory_regions(hdev); 3212 3213 rc = gaudi2_special_blocks_iterator_config(hdev); 3214 if (rc) 3215 goto free_scratchpad_mem; 3216 3217 return 0; 3218 3219 free_scratchpad_mem: 3220 hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address, 3221 gaudi2->scratchpad_bus_address); 3222 free_virt_msix_db_mem: 3223 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3224 free_cpu_accessible_dma_pool: 3225 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3226 free_cpu_dma_mem: 3227 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3228 hdev->cpu_accessible_dma_address); 3229 free_dma_pool: 3230 dma_pool_destroy(hdev->dma_pool); 3231 free_gaudi2_device: 3232 kfree(gaudi2); 3233 return rc; 3234 } 3235 3236 static int gaudi2_sw_fini(struct hl_device *hdev) 3237 { 3238 struct asic_fixed_properties *prop = &hdev->asic_prop; 3239 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3240 3241 gaudi2_special_blocks_iterator_free(hdev); 3242 3243 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3244 3245 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3246 3247 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3248 hdev->cpu_accessible_dma_address); 3249 3250 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3251 gaudi2->scratchpad_bus_address); 3252 3253 dma_pool_destroy(hdev->dma_pool); 3254 3255 kfree(gaudi2); 3256 3257 return 0; 3258 } 3259 3260 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3261 { 3262 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3263 QM_GLBL_CFG1_CQF_STOP | 3264 QM_GLBL_CFG1_CP_STOP); 3265 3266 /* stop also the ARC */ 3267 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3268 } 3269 3270 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3271 { 3272 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3273 QM_GLBL_CFG1_CQF_FLUSH | 3274 QM_GLBL_CFG1_CP_FLUSH); 3275 } 3276 3277 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3278 { 3279 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3280 } 3281 3282 /** 3283 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3284 * 3285 * @hdev: pointer to the habanalabs device structure 3286 * @queue_id: queue to clear fence counters to 3287 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3288 * getting stuck on any fence value. otherwise set all fence 3289 * counters to 0 (standard clear of fence counters) 3290 */ 3291 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3292 bool skip_fence) 3293 { 3294 u32 size, reg_base; 3295 u32 addr, val; 3296 3297 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3298 3299 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3300 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3301 3302 /* 3303 * in case we want to make sure that QM that is stuck on a fence will 3304 * be released we should set the fence counter to a higher value that 3305 * the value the QM waiting for. to comply with any fence counter of 3306 * any value we set maximum fence value to all counters 3307 */ 3308 val = skip_fence ? U32_MAX : 0; 3309 gaudi2_memset_device_lbw(hdev, addr, size, val); 3310 } 3311 3312 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3313 { 3314 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3315 3316 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3317 gaudi2_flush_qman_common(hdev, reg_base); 3318 gaudi2_flush_qman_arc_common(hdev, reg_base); 3319 } 3320 3321 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3322 { 3323 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3324 int dcore, inst; 3325 3326 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3327 goto stop_edma_qmans; 3328 3329 /* Stop CPs of PDMA QMANs */ 3330 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3331 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3332 3333 stop_edma_qmans: 3334 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3335 return; 3336 3337 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3338 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3339 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3340 u32 qm_base; 3341 3342 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3343 continue; 3344 3345 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3346 inst * DCORE_EDMA_OFFSET; 3347 3348 /* Stop CPs of EDMA QMANs */ 3349 gaudi2_stop_qman_common(hdev, qm_base); 3350 } 3351 } 3352 } 3353 3354 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3355 { 3356 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3357 u32 offset, i; 3358 3359 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3360 3361 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3362 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3363 continue; 3364 3365 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3366 } 3367 } 3368 3369 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3370 { 3371 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3372 u32 reg_base; 3373 int i; 3374 3375 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3376 return; 3377 3378 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3379 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3380 continue; 3381 3382 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3383 gaudi2_stop_qman_common(hdev, reg_base); 3384 } 3385 } 3386 3387 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3388 { 3389 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3390 u32 reg_base; 3391 int i; 3392 3393 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3394 return; 3395 3396 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3397 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3398 continue; 3399 3400 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3401 gaudi2_stop_qman_common(hdev, reg_base); 3402 } 3403 } 3404 3405 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3406 { 3407 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3408 u32 reg_base, queue_id; 3409 int i; 3410 3411 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3412 return; 3413 3414 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3415 3416 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3417 if (!(hdev->nic_ports_mask & BIT(i))) 3418 continue; 3419 3420 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3421 gaudi2_stop_qman_common(hdev, reg_base); 3422 } 3423 } 3424 3425 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3426 { 3427 u32 reg_val; 3428 3429 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3430 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3431 } 3432 3433 static void gaudi2_dma_stall(struct hl_device *hdev) 3434 { 3435 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3436 int dcore, inst; 3437 3438 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3439 goto stall_edma; 3440 3441 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3442 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3443 3444 stall_edma: 3445 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3446 return; 3447 3448 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3449 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3450 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3451 u32 core_base; 3452 3453 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3454 continue; 3455 3456 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3457 inst * DCORE_EDMA_OFFSET; 3458 3459 /* Stall CPs of EDMA QMANs */ 3460 gaudi2_stall_dma_common(hdev, core_base); 3461 } 3462 } 3463 } 3464 3465 static void gaudi2_mme_stall(struct hl_device *hdev) 3466 { 3467 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3468 u32 offset, i; 3469 3470 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3471 3472 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3473 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3474 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3475 } 3476 3477 static void gaudi2_tpc_stall(struct hl_device *hdev) 3478 { 3479 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3480 u32 reg_base; 3481 int i; 3482 3483 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3484 return; 3485 3486 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3487 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3488 continue; 3489 3490 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3491 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3492 } 3493 } 3494 3495 static void gaudi2_rotator_stall(struct hl_device *hdev) 3496 { 3497 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3498 u32 reg_val; 3499 int i; 3500 3501 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3502 return; 3503 3504 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3505 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3506 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3507 3508 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3509 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3510 continue; 3511 3512 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3513 } 3514 } 3515 3516 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3517 { 3518 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3519 } 3520 3521 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3522 { 3523 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3524 int dcore, inst; 3525 3526 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3527 goto stop_edma_qmans; 3528 3529 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3530 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3531 3532 stop_edma_qmans: 3533 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3534 return; 3535 3536 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3537 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3538 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3539 u32 qm_base; 3540 3541 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3542 continue; 3543 3544 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3545 inst * DCORE_EDMA_OFFSET; 3546 3547 /* Disable CPs of EDMA QMANs */ 3548 gaudi2_disable_qman_common(hdev, qm_base); 3549 } 3550 } 3551 } 3552 3553 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3554 { 3555 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3556 u32 offset, i; 3557 3558 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3559 3560 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3561 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3562 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3563 } 3564 3565 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3566 { 3567 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3568 u32 reg_base; 3569 int i; 3570 3571 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3572 return; 3573 3574 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3575 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3576 continue; 3577 3578 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3579 gaudi2_disable_qman_common(hdev, reg_base); 3580 } 3581 } 3582 3583 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 3584 { 3585 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3586 u32 reg_base; 3587 int i; 3588 3589 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3590 return; 3591 3592 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3593 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3594 continue; 3595 3596 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3597 gaudi2_disable_qman_common(hdev, reg_base); 3598 } 3599 } 3600 3601 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 3602 { 3603 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3604 u32 reg_base, queue_id; 3605 int i; 3606 3607 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3608 return; 3609 3610 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3611 3612 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3613 if (!(hdev->nic_ports_mask & BIT(i))) 3614 continue; 3615 3616 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3617 gaudi2_disable_qman_common(hdev, reg_base); 3618 } 3619 } 3620 3621 static void gaudi2_enable_timestamp(struct hl_device *hdev) 3622 { 3623 /* Disable the timestamp counter */ 3624 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3625 3626 /* Zero the lower/upper parts of the 64-bit counter */ 3627 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 3628 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 3629 3630 /* Enable the counter */ 3631 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 3632 } 3633 3634 static void gaudi2_disable_timestamp(struct hl_device *hdev) 3635 { 3636 /* Disable the timestamp counter */ 3637 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3638 } 3639 3640 static const char *gaudi2_irq_name(u16 irq_number) 3641 { 3642 switch (irq_number) { 3643 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 3644 return "gaudi2 cpu eq"; 3645 case GAUDI2_IRQ_NUM_COMPLETION: 3646 return "gaudi2 completion"; 3647 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 3648 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 3649 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 3650 return "gaudi2 user completion"; 3651 default: 3652 return "invalid"; 3653 } 3654 } 3655 3656 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 3657 { 3658 int i, irq, relative_idx; 3659 struct hl_dec *dec; 3660 3661 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 3662 irq = pci_irq_vector(hdev->pdev, i); 3663 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3664 3665 dec = hdev->dec + relative_idx / 2; 3666 3667 /* We pass different structures depending on the irq handler. For the abnormal 3668 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3669 * user_interrupt entry 3670 */ 3671 free_irq(irq, ((relative_idx % 2) ? 3672 (void *) dec : 3673 (void *) &hdev->user_interrupt[dec->core_id])); 3674 } 3675 } 3676 3677 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 3678 { 3679 int rc, i, irq_init_cnt, irq, relative_idx; 3680 irq_handler_t irq_handler; 3681 struct hl_dec *dec; 3682 3683 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 3684 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 3685 i++, irq_init_cnt++) { 3686 3687 irq = pci_irq_vector(hdev->pdev, i); 3688 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3689 3690 irq_handler = (relative_idx % 2) ? 3691 hl_irq_handler_dec_abnrm : 3692 hl_irq_handler_user_interrupt; 3693 3694 dec = hdev->dec + relative_idx / 2; 3695 3696 /* We pass different structures depending on the irq handler. For the abnormal 3697 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3698 * user_interrupt entry 3699 */ 3700 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), 3701 ((relative_idx % 2) ? 3702 (void *) dec : 3703 (void *) &hdev->user_interrupt[dec->core_id])); 3704 if (rc) { 3705 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3706 goto free_dec_irqs; 3707 } 3708 } 3709 3710 return 0; 3711 3712 free_dec_irqs: 3713 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 3714 return rc; 3715 } 3716 3717 static int gaudi2_enable_msix(struct hl_device *hdev) 3718 { 3719 struct asic_fixed_properties *prop = &hdev->asic_prop; 3720 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3721 int rc, irq, i, j, user_irq_init_cnt; 3722 irq_handler_t irq_handler; 3723 struct hl_cq *cq; 3724 3725 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 3726 return 0; 3727 3728 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 3729 PCI_IRQ_MSIX); 3730 if (rc < 0) { 3731 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 3732 GAUDI2_MSIX_ENTRIES, rc); 3733 return rc; 3734 } 3735 3736 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3737 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3738 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 3739 if (rc) { 3740 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3741 goto free_irq_vectors; 3742 } 3743 3744 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3745 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 3746 &hdev->event_queue); 3747 if (rc) { 3748 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3749 goto free_completion_irq; 3750 } 3751 3752 rc = gaudi2_dec_enable_msix(hdev); 3753 if (rc) { 3754 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 3755 goto free_event_irq; 3756 } 3757 3758 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 3759 user_irq_init_cnt < prop->user_interrupt_count; 3760 i++, j++, user_irq_init_cnt++) { 3761 3762 irq = pci_irq_vector(hdev->pdev, i); 3763 irq_handler = hl_irq_handler_user_interrupt; 3764 3765 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); 3766 if (rc) { 3767 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3768 goto free_user_irq; 3769 } 3770 } 3771 3772 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 3773 3774 return 0; 3775 3776 free_user_irq: 3777 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 3778 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 3779 3780 irq = pci_irq_vector(hdev->pdev, i); 3781 free_irq(irq, &hdev->user_interrupt[j]); 3782 } 3783 3784 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3785 3786 free_event_irq: 3787 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3788 free_irq(irq, cq); 3789 3790 free_completion_irq: 3791 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3792 free_irq(irq, cq); 3793 3794 free_irq_vectors: 3795 pci_free_irq_vectors(hdev->pdev); 3796 3797 return rc; 3798 } 3799 3800 static void gaudi2_sync_irqs(struct hl_device *hdev) 3801 { 3802 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3803 int i, j; 3804 int irq; 3805 3806 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3807 return; 3808 3809 /* Wait for all pending IRQs to be finished */ 3810 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 3811 3812 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 3813 irq = pci_irq_vector(hdev->pdev, i); 3814 synchronize_irq(irq); 3815 } 3816 3817 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 3818 i++, j++) { 3819 irq = pci_irq_vector(hdev->pdev, i); 3820 synchronize_irq(irq); 3821 } 3822 3823 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 3824 } 3825 3826 static void gaudi2_disable_msix(struct hl_device *hdev) 3827 { 3828 struct asic_fixed_properties *prop = &hdev->asic_prop; 3829 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3830 struct hl_cq *cq; 3831 int irq, i, j, k; 3832 3833 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3834 return; 3835 3836 gaudi2_sync_irqs(hdev); 3837 3838 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3839 free_irq(irq, &hdev->event_queue); 3840 3841 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3842 3843 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 3844 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 3845 3846 irq = pci_irq_vector(hdev->pdev, i); 3847 free_irq(irq, &hdev->user_interrupt[j]); 3848 } 3849 3850 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3851 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3852 free_irq(irq, cq); 3853 3854 pci_free_irq_vectors(hdev->pdev); 3855 3856 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 3857 } 3858 3859 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 3860 { 3861 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3862 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3863 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3864 int rc; 3865 3866 if (hdev->pldm) 3867 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3868 else 3869 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3870 3871 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3872 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 3873 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3874 continue; 3875 3876 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 3877 3878 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 3879 3880 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3881 3882 /* Wait till all traffic from decoder stops 3883 * before apply core reset. 3884 */ 3885 rc = hl_poll_timeout( 3886 hdev, 3887 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3888 graceful, 3889 (graceful & graceful_pend_mask), 3890 100, 3891 timeout_usec); 3892 if (rc) 3893 dev_err(hdev->dev, 3894 "Failed to stop traffic from DCORE%d Decoder %d\n", 3895 dcore_id, dec_id); 3896 } 3897 } 3898 3899 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 3900 { 3901 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3902 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3903 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3904 int rc; 3905 3906 if (hdev->pldm) 3907 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3908 else 3909 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3910 3911 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3912 dec_bit = PCIE_DEC_SHIFT + dec_id; 3913 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3914 continue; 3915 3916 offset = dec_id * PCIE_VDEC_OFFSET; 3917 3918 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 3919 3920 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3921 3922 /* Wait till all traffic from decoder stops 3923 * before apply core reset. 3924 */ 3925 rc = hl_poll_timeout( 3926 hdev, 3927 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3928 graceful, 3929 (graceful & graceful_pend_mask), 3930 100, 3931 timeout_usec); 3932 if (rc) 3933 dev_err(hdev->dev, 3934 "Failed to stop traffic from PCIe Decoder %d\n", 3935 dec_id); 3936 } 3937 } 3938 3939 static void gaudi2_stop_dec(struct hl_device *hdev) 3940 { 3941 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3942 int dcore_id; 3943 3944 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 3945 return; 3946 3947 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 3948 gaudi2_stop_dcore_dec(hdev, dcore_id); 3949 3950 gaudi2_stop_pcie_dec(hdev); 3951 } 3952 3953 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3954 { 3955 u32 reg_base, reg_val; 3956 3957 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3958 if (run_mode == HL_ENGINE_CORE_RUN) 3959 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 3960 else 3961 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3962 3963 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 3964 } 3965 3966 static void gaudi2_halt_arcs(struct hl_device *hdev) 3967 { 3968 u16 arc_id; 3969 3970 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 3971 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3972 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 3973 } 3974 } 3975 3976 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3977 { 3978 int rc; 3979 u32 reg_base, val, ack_mask, timeout_usec = 100000; 3980 3981 if (hdev->pldm) 3982 timeout_usec *= 100; 3983 3984 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3985 if (run_mode == HL_ENGINE_CORE_RUN) 3986 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 3987 else 3988 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 3989 3990 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 3991 val, ((val & ack_mask) == ack_mask), 3992 1000, timeout_usec); 3993 3994 if (!rc) { 3995 /* Clear */ 3996 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 3997 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 3998 } 3999 4000 return rc; 4001 } 4002 4003 static void gaudi2_reset_arcs(struct hl_device *hdev) 4004 { 4005 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4006 u16 arc_id; 4007 4008 if (!gaudi2) 4009 return; 4010 4011 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 4012 if (gaudi2_is_arc_enabled(hdev, arc_id)) 4013 gaudi2_clr_arc_id_cap(hdev, arc_id); 4014 } 4015 4016 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 4017 { 4018 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4019 u32 queue_id; 4020 int i; 4021 4022 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 4023 return; 4024 4025 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 4026 4027 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4028 if (!(hdev->nic_ports_mask & BIT(i))) 4029 continue; 4030 4031 gaudi2_qman_manual_flush_common(hdev, queue_id); 4032 } 4033 } 4034 4035 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 4036 u32 num_cores, u32 core_command) 4037 { 4038 int i, rc; 4039 4040 4041 for (i = 0 ; i < num_cores ; i++) { 4042 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 4043 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 4044 } 4045 4046 for (i = 0 ; i < num_cores ; i++) { 4047 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 4048 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 4049 4050 if (rc) { 4051 dev_err(hdev->dev, "failed to %s arc: %d\n", 4052 (core_command == HL_ENGINE_CORE_HALT) ? 4053 "HALT" : "RUN", core_ids[i]); 4054 return -1; 4055 } 4056 } 4057 } 4058 4059 return 0; 4060 } 4061 4062 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4063 { 4064 u32 wait_timeout_ms; 4065 4066 if (hdev->pldm) 4067 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 4068 else 4069 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 4070 4071 if (fw_reset) 4072 goto skip_engines; 4073 4074 gaudi2_stop_dma_qmans(hdev); 4075 gaudi2_stop_mme_qmans(hdev); 4076 gaudi2_stop_tpc_qmans(hdev); 4077 gaudi2_stop_rot_qmans(hdev); 4078 gaudi2_stop_nic_qmans(hdev); 4079 msleep(wait_timeout_ms); 4080 4081 gaudi2_halt_arcs(hdev); 4082 gaudi2_dma_stall(hdev); 4083 gaudi2_mme_stall(hdev); 4084 gaudi2_tpc_stall(hdev); 4085 gaudi2_rotator_stall(hdev); 4086 4087 msleep(wait_timeout_ms); 4088 4089 gaudi2_stop_dec(hdev); 4090 4091 /* 4092 * in case of soft reset do a manual flush for QMANs (currently called 4093 * only for NIC QMANs 4094 */ 4095 if (!hard_reset) 4096 gaudi2_nic_qmans_manual_flush(hdev); 4097 4098 gaudi2_disable_dma_qmans(hdev); 4099 gaudi2_disable_mme_qmans(hdev); 4100 gaudi2_disable_tpc_qmans(hdev); 4101 gaudi2_disable_rot_qmans(hdev); 4102 gaudi2_disable_nic_qmans(hdev); 4103 gaudi2_disable_timestamp(hdev); 4104 4105 skip_engines: 4106 if (hard_reset) { 4107 gaudi2_disable_msix(hdev); 4108 return; 4109 } 4110 4111 gaudi2_sync_irqs(hdev); 4112 } 4113 4114 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 4115 { 4116 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 4117 4118 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 4119 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 4120 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 4121 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 4122 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 4123 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 4124 } 4125 4126 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 4127 { 4128 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 4129 struct dynamic_fw_load_mgr *dynamic_loader; 4130 struct cpu_dyn_regs *dyn_regs; 4131 4132 /* fill common fields */ 4133 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 4134 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 4135 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 4136 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 4137 fw_loader->skip_bmc = false; 4138 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 4139 fw_loader->dram_bar_id = DRAM_BAR_ID; 4140 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 4141 4142 /* here we update initial values for few specific dynamic regs (as 4143 * before reading the first descriptor from FW those value has to be 4144 * hard-coded). in later stages of the protocol those values will be 4145 * updated automatically by reading the FW descriptor so data there 4146 * will always be up-to-date 4147 */ 4148 dynamic_loader = &hdev->fw_loader.dynamic_loader; 4149 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 4150 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 4151 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 4152 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 4153 } 4154 4155 static int gaudi2_init_cpu(struct hl_device *hdev) 4156 { 4157 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4158 int rc; 4159 4160 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 4161 return 0; 4162 4163 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 4164 return 0; 4165 4166 rc = hl_fw_init_cpu(hdev); 4167 if (rc) 4168 return rc; 4169 4170 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4171 4172 return 0; 4173 } 4174 4175 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4176 { 4177 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4178 struct asic_fixed_properties *prop = &hdev->asic_prop; 4179 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4180 struct cpu_dyn_regs *dyn_regs; 4181 struct hl_eq *eq; 4182 u32 status; 4183 int err; 4184 4185 if (!hdev->cpu_queues_enable) 4186 return 0; 4187 4188 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4189 return 0; 4190 4191 eq = &hdev->event_queue; 4192 4193 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4194 4195 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4196 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4197 4198 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4199 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4200 4201 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4202 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4203 4204 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4205 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4206 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4207 4208 /* Used for EQ CI */ 4209 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4210 4211 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4212 4213 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4214 4215 /* Let the ARC know we are ready as it is now handling those queues */ 4216 4217 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4218 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4219 4220 err = hl_poll_timeout( 4221 hdev, 4222 mmCPU_IF_QUEUE_INIT, 4223 status, 4224 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4225 1000, 4226 cpu_timeout); 4227 4228 if (err) { 4229 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4230 return -EIO; 4231 } 4232 4233 /* update FW application security bits */ 4234 if (prop->fw_cpu_boot_dev_sts0_valid) 4235 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4236 4237 if (prop->fw_cpu_boot_dev_sts1_valid) 4238 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4239 4240 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4241 return 0; 4242 } 4243 4244 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4245 u32 queue_id_base) 4246 { 4247 struct hl_hw_queue *q; 4248 u32 pq_id, pq_offset; 4249 4250 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4251 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4252 pq_offset = pq_id * 4; 4253 4254 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4255 lower_32_bits(q->bus_address)); 4256 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4257 upper_32_bits(q->bus_address)); 4258 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4259 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4260 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4261 } 4262 } 4263 4264 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4265 { 4266 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4267 4268 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4269 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4270 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4271 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4272 4273 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4274 cp_offset = cp_id * 4; 4275 4276 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4277 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4278 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4279 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4280 } 4281 4282 /* allow QMANs to accept work from ARC CQF */ 4283 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4284 } 4285 4286 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4287 u32 queue_id_base) 4288 { 4289 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4290 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4291 4292 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4293 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4294 4295 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4296 pq_offset = pq_id * 4; 4297 4298 /* Configure QMAN HBW to scratchpad as it is not needed */ 4299 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4300 lower_32_bits(gaudi2->scratchpad_bus_address)); 4301 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4302 upper_32_bits(gaudi2->scratchpad_bus_address)); 4303 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4304 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4305 4306 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4307 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4308 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4309 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4310 } 4311 4312 /* Enable QMAN H/W completion */ 4313 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4314 } 4315 4316 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4317 { 4318 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4319 u32 sp_reg_addr; 4320 4321 switch (queue_id_base) { 4322 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4323 fallthrough; 4324 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4325 fallthrough; 4326 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4327 fallthrough; 4328 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4329 fallthrough; 4330 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4331 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4332 break; 4333 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4334 fallthrough; 4335 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4336 fallthrough; 4337 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4338 fallthrough; 4339 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4340 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4341 break; 4342 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4343 fallthrough; 4344 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4345 fallthrough; 4346 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4347 fallthrough; 4348 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4349 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4350 break; 4351 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4352 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4353 break; 4354 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4355 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4356 break; 4357 default: 4358 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4359 return 0; 4360 } 4361 4362 return sp_reg_addr; 4363 } 4364 4365 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4366 u32 queue_id_base) 4367 { 4368 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4369 int map_table_entry; 4370 4371 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4372 4373 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4374 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4375 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4376 4377 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4378 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4379 gaudi2_irq_map_table[map_table_entry].cpu_id); 4380 4381 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4382 4383 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4384 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4385 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4386 4387 /* Enable the QMAN channel. 4388 * PDMA QMAN configuration is different, as we do not allow user to 4389 * access some of the CPs. 4390 * PDMA0: CP2/3 are reserved for the ARC usage. 4391 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4392 */ 4393 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4394 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4395 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4396 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4397 else 4398 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4399 } 4400 4401 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4402 u32 queue_id_base) 4403 { 4404 u32 pq_id; 4405 4406 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4407 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4408 4409 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4410 gaudi2_init_qman_cp(hdev, reg_base); 4411 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4412 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4413 } 4414 4415 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 4416 u32 dma_core_id, bool is_secure) 4417 { 4418 u32 prot, irq_handler_offset; 4419 struct cpu_dyn_regs *dyn_regs; 4420 int map_table_entry; 4421 4422 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 4423 if (is_secure) 4424 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 4425 4426 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 4427 4428 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4429 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 4430 4431 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 4432 lower_32_bits(CFG_BASE + irq_handler_offset)); 4433 4434 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 4435 upper_32_bits(CFG_BASE + irq_handler_offset)); 4436 4437 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 4438 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 4439 gaudi2_irq_map_table[map_table_entry].cpu_id); 4440 4441 /* Enable the DMA channel */ 4442 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 4443 } 4444 4445 static void gaudi2_init_kdma(struct hl_device *hdev) 4446 { 4447 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4448 u32 reg_base; 4449 4450 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 4451 return; 4452 4453 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 4454 4455 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 4456 4457 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 4458 } 4459 4460 static void gaudi2_init_pdma(struct hl_device *hdev) 4461 { 4462 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4463 u32 reg_base; 4464 4465 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 4466 return; 4467 4468 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 4469 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 4470 4471 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 4472 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 4473 4474 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 4475 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 4476 4477 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 4478 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 4479 4480 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 4481 } 4482 4483 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 4484 { 4485 u32 reg_base, base_edma_core_id, base_edma_qman_id; 4486 4487 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 4488 base_edma_qman_id = edma_stream_base[seq]; 4489 4490 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 4491 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 4492 4493 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 4494 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 4495 } 4496 4497 static void gaudi2_init_edma(struct hl_device *hdev) 4498 { 4499 struct asic_fixed_properties *prop = &hdev->asic_prop; 4500 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4501 int dcore, inst; 4502 4503 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 4504 return; 4505 4506 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 4507 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 4508 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 4509 4510 if (!(prop->edma_enabled_mask & BIT(seq))) 4511 continue; 4512 4513 gaudi2_init_edma_instance(hdev, seq); 4514 4515 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 4516 } 4517 } 4518 } 4519 4520 /* 4521 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 4522 * @hdev: pointer to habanalabs device structure. 4523 * @sob_id: sync object ID. 4524 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 4525 * @interrupt_id: interrupt ID. 4526 * 4527 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 4528 * write directly to the HBW host memory of the virtual MSI-X doorbell. 4529 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 4530 * 4531 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 4532 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 4533 * completion, by decrementing the sync object value and re-arming the monitor. 4534 */ 4535 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 4536 u32 first_mon_id, u32 interrupt_id) 4537 { 4538 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 4539 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4540 u64 addr; 4541 u8 mask; 4542 4543 /* Reset the SOB value */ 4544 sob_offset = sob_id * sizeof(u32); 4545 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 4546 4547 /* Configure 3 monitors: 4548 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 4549 * 2. Decrement SOB value by 1. 4550 * 3. Re-arm the master monitor. 4551 */ 4552 4553 first_mon_offset = first_mon_id * sizeof(u32); 4554 4555 /* 2nd monitor: Decrement SOB value by 1 */ 4556 mon_offset = first_mon_offset + sizeof(u32); 4557 4558 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 4559 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4560 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4561 4562 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 4563 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 4564 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 4565 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4566 4567 /* 3rd monitor: Re-arm the master monitor */ 4568 mon_offset = first_mon_offset + 2 * sizeof(u32); 4569 4570 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 4571 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4572 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4573 4574 sob_group = sob_id / 8; 4575 mask = ~BIT(sob_id & 0x7); 4576 mode = 0; /* comparison mode is "greater than or equal to" */ 4577 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 4578 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 4579 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 4580 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 4581 4582 payload = arm; 4583 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4584 4585 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 4586 mon_offset = first_mon_offset; 4587 4588 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 4589 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 4590 4591 addr = gaudi2->virt_msix_db_dma_addr; 4592 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4593 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4594 4595 payload = interrupt_id; 4596 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4597 4598 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 4599 } 4600 4601 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 4602 { 4603 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 4604 struct asic_fixed_properties *prop = &hdev->asic_prop; 4605 4606 /* Decoder normal/abnormal interrupts */ 4607 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 4608 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 4609 continue; 4610 4611 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4612 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 4613 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 4614 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4615 4616 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4617 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 4618 interrupt_id += 1; 4619 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4620 } 4621 } 4622 4623 static void gaudi2_init_sm(struct hl_device *hdev) 4624 { 4625 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4626 u64 cq_address; 4627 u32 reg_val; 4628 int i; 4629 4630 /* Enable HBW/LBW CQ for completion monitors */ 4631 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4632 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 4633 4634 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 4635 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4636 4637 /* Enable only HBW CQ for KDMA completion monitor */ 4638 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4639 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4640 4641 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 4642 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 4643 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 4644 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 4645 4646 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 4647 cq_address = 4648 hdev->completion_queue[i].bus_address; 4649 4650 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 4651 lower_32_bits(cq_address)); 4652 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 4653 upper_32_bits(cq_address)); 4654 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 4655 ilog2(HL_CQ_SIZE_IN_BYTES)); 4656 } 4657 4658 /* Configure kernel ASID and MMU BP*/ 4659 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 4660 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 4661 4662 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 4663 gaudi2_prepare_sm_for_virt_msix_db(hdev); 4664 } 4665 4666 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 4667 { 4668 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4669 u32 reg_val; 4670 int i; 4671 4672 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 4673 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 4674 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 4675 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 4676 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 4677 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 4678 4679 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 4680 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 4681 4682 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 4683 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 4684 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 4685 } 4686 } 4687 4688 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 4689 bool config_qman_only) 4690 { 4691 u32 queue_id_base, reg_base; 4692 4693 switch (dcore_id) { 4694 case 0: 4695 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 4696 break; 4697 case 1: 4698 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 4699 break; 4700 case 2: 4701 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 4702 break; 4703 case 3: 4704 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 4705 break; 4706 default: 4707 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 4708 return; 4709 } 4710 4711 if (!config_qman_only) { 4712 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 4713 gaudi2_init_mme_acc(hdev, reg_base); 4714 } 4715 4716 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 4717 gaudi2_init_qman(hdev, reg_base, queue_id_base); 4718 } 4719 4720 static void gaudi2_init_mme(struct hl_device *hdev) 4721 { 4722 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4723 int i; 4724 4725 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 4726 return; 4727 4728 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 4729 gaudi2_init_dcore_mme(hdev, i, false); 4730 4731 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 4732 } 4733 } 4734 4735 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 4736 { 4737 /* Mask arithmetic and QM interrupts in TPC */ 4738 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 4739 4740 /* Set 16 cache lines */ 4741 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 4742 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 4743 } 4744 4745 struct gaudi2_tpc_init_cfg_data { 4746 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 4747 }; 4748 4749 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 4750 u32 offset, struct iterate_module_ctx *ctx) 4751 { 4752 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4753 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 4754 u32 queue_id_base; 4755 u8 seq; 4756 4757 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 4758 4759 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 4760 /* gets last sequence number */ 4761 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 4762 else 4763 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 4764 4765 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 4766 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 4767 4768 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 4769 } 4770 4771 static void gaudi2_init_tpc(struct hl_device *hdev) 4772 { 4773 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4774 struct gaudi2_tpc_init_cfg_data init_cfg_data; 4775 struct iterate_module_ctx tpc_iter; 4776 4777 if (!hdev->asic_prop.tpc_enabled_mask) 4778 return; 4779 4780 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 4781 return; 4782 4783 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 4784 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 4785 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 4786 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 4787 tpc_iter.fn = &gaudi2_init_tpc_config; 4788 tpc_iter.data = &init_cfg_data; 4789 gaudi2_iterate_tpcs(hdev, &tpc_iter); 4790 } 4791 4792 static void gaudi2_init_rotator(struct hl_device *hdev) 4793 { 4794 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4795 u32 i, reg_base, queue_id; 4796 4797 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 4798 4799 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4800 reg_base = gaudi2_qm_blocks_bases[queue_id]; 4801 gaudi2_init_qman(hdev, reg_base, queue_id); 4802 4803 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 4804 } 4805 } 4806 4807 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 4808 { 4809 u32 sob_id; 4810 4811 /* VCMD normal interrupt */ 4812 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4813 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 4814 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4815 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4816 4817 /* VCMD abnormal interrupt */ 4818 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4819 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 4820 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4821 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4822 } 4823 4824 static void gaudi2_init_dec(struct hl_device *hdev) 4825 { 4826 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4827 u32 dcore_id, dec_id, dec_bit; 4828 u64 base_addr; 4829 4830 if (!hdev->asic_prop.decoder_enabled_mask) 4831 return; 4832 4833 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 4834 return; 4835 4836 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4837 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4838 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4839 4840 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4841 continue; 4842 4843 base_addr = mmDCORE0_DEC0_CMD_BASE + 4844 BRDG_CTRL_BLOCK_OFFSET + 4845 dcore_id * DCORE_OFFSET + 4846 dec_id * DCORE_VDEC_OFFSET; 4847 4848 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4849 4850 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4851 } 4852 4853 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 4854 dec_bit = PCIE_DEC_SHIFT + dec_id; 4855 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4856 continue; 4857 4858 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 4859 dec_id * DCORE_VDEC_OFFSET; 4860 4861 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4862 4863 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4864 } 4865 } 4866 4867 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 4868 u32 stlb_base, u32 asid, u64 phys_addr) 4869 { 4870 u32 status, timeout_usec; 4871 int rc; 4872 4873 if (hdev->pldm || !hdev->pdev) 4874 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 4875 else 4876 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 4877 4878 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 4879 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 4880 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 4881 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 4882 4883 rc = hl_poll_timeout( 4884 hdev, 4885 stlb_base + STLB_BUSY_OFFSET, 4886 status, 4887 !(status & 0x80000000), 4888 1000, 4889 timeout_usec); 4890 4891 if (rc) { 4892 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 4893 return rc; 4894 } 4895 4896 return 0; 4897 } 4898 4899 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 4900 u32 start_offset, u32 inv_start_val, 4901 u32 flags) 4902 { 4903 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 4904 if (flags & MMU_OP_CLEAR_MEMCACHE) 4905 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 4906 4907 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 4908 return; 4909 4910 WREG32(stlb_base + start_offset, inv_start_val); 4911 } 4912 4913 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 4914 struct gaudi2_cache_invld_params *inv_params) 4915 { 4916 u32 status, timeout_usec, start_offset; 4917 int rc; 4918 4919 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 4920 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 4921 4922 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 4923 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 4924 rc = hl_poll_timeout( 4925 hdev, 4926 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 4927 status, 4928 status & 0x1, 4929 1000, 4930 timeout_usec); 4931 4932 if (rc) 4933 return rc; 4934 4935 /* Need to manually reset the status to 0 */ 4936 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 4937 } 4938 4939 /* Lower cache does not work with cache lines, hence we can skip its 4940 * invalidation upon map and invalidate only upon unmap 4941 */ 4942 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 4943 return 0; 4944 4945 start_offset = inv_params->range_invalidation ? 4946 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 4947 4948 rc = hl_poll_timeout( 4949 hdev, 4950 stlb_base + start_offset, 4951 status, 4952 !(status & 0x1), 4953 1000, 4954 timeout_usec); 4955 4956 return rc; 4957 } 4958 4959 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 4960 { 4961 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4962 u32 hw_cap; 4963 4964 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 4965 4966 if (gaudi2->hw_cap_initialized & hw_cap) 4967 return true; 4968 4969 return false; 4970 } 4971 4972 /* this function shall be called only for HMMUs for which capability bit is set */ 4973 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 4974 { 4975 u32 offset; 4976 4977 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 4978 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 4979 } 4980 4981 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 4982 struct gaudi2_cache_invld_params *inv_params) 4983 { 4984 u32 start_offset; 4985 4986 if (inv_params->range_invalidation) { 4987 /* Set the addresses range 4988 * Note: that the start address we set in register, is not included in 4989 * the range of the invalidation, by design. 4990 * that's why we need to set lower address than the one we actually 4991 * want to be included in the range invalidation. 4992 */ 4993 u64 start = inv_params->start_va - 1; 4994 4995 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 4996 4997 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 4998 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 4999 5000 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 5001 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 5002 5003 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 5004 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 5005 5006 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 5007 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 5008 } else { 5009 start_offset = STLB_INV_ALL_START_OFFSET; 5010 } 5011 5012 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 5013 inv_params->inv_start_val, inv_params->flags); 5014 } 5015 5016 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 5017 int dcore_id, int hmmu_id, 5018 struct gaudi2_cache_invld_params *inv_params) 5019 { 5020 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5021 5022 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 5023 } 5024 5025 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 5026 int dcore_id, int hmmu_id, 5027 struct gaudi2_cache_invld_params *inv_params) 5028 { 5029 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5030 5031 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 5032 } 5033 5034 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 5035 struct gaudi2_cache_invld_params *inv_params) 5036 { 5037 int dcore_id, hmmu_id; 5038 5039 /* first send all invalidation commands */ 5040 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5041 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5042 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5043 continue; 5044 5045 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 5046 } 5047 } 5048 5049 /* next, poll all invalidations status */ 5050 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5051 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5052 int rc; 5053 5054 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5055 continue; 5056 5057 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 5058 inv_params); 5059 if (rc) 5060 return rc; 5061 } 5062 } 5063 5064 return 0; 5065 } 5066 5067 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 5068 { 5069 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5070 struct gaudi2_cache_invld_params invld_params; 5071 int rc = 0; 5072 5073 if (hdev->reset_info.hard_reset_pending) 5074 return rc; 5075 5076 invld_params.range_invalidation = false; 5077 invld_params.inv_start_val = 1; 5078 5079 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5080 invld_params.flags = flags; 5081 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5082 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5083 &invld_params); 5084 } else if (flags & MMU_OP_PHYS_PACK) { 5085 invld_params.flags = 0; 5086 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5087 } 5088 5089 return rc; 5090 } 5091 5092 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 5093 u32 flags, u32 asid, u64 va, u64 size) 5094 { 5095 struct gaudi2_cache_invld_params invld_params = {0}; 5096 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5097 u64 start_va, end_va; 5098 u32 inv_start_val; 5099 int rc = 0; 5100 5101 if (hdev->reset_info.hard_reset_pending) 5102 return 0; 5103 5104 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 5105 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 5106 asid << MMU_RANGE_INV_ASID_SHIFT); 5107 start_va = va; 5108 end_va = start_va + size; 5109 5110 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5111 /* As range invalidation does not support zero address we will 5112 * do full invalidation in this case 5113 */ 5114 if (start_va) { 5115 invld_params.range_invalidation = true; 5116 invld_params.start_va = start_va; 5117 invld_params.end_va = end_va; 5118 invld_params.inv_start_val = inv_start_val; 5119 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 5120 } else { 5121 invld_params.range_invalidation = false; 5122 invld_params.inv_start_val = 1; 5123 invld_params.flags = flags; 5124 } 5125 5126 5127 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5128 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5129 &invld_params); 5130 if (rc) 5131 return rc; 5132 5133 } else if (flags & MMU_OP_PHYS_PACK) { 5134 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 5135 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 5136 invld_params.inv_start_val = inv_start_val; 5137 invld_params.flags = flags; 5138 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5139 } 5140 5141 return rc; 5142 } 5143 5144 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 5145 { 5146 struct asic_fixed_properties *prop = &hdev->asic_prop; 5147 u64 hop0_addr; 5148 u32 asid, max_asid = prop->max_asid; 5149 int rc; 5150 5151 /* it takes too much time to init all of the ASIDs on palladium */ 5152 if (hdev->pldm) 5153 max_asid = min((u32) 8, max_asid); 5154 5155 for (asid = 0 ; asid < max_asid ; asid++) { 5156 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 5157 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 5158 if (rc) { 5159 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 5160 return rc; 5161 } 5162 } 5163 5164 return 0; 5165 } 5166 5167 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5168 { 5169 u32 status, timeout_usec; 5170 int rc; 5171 5172 if (hdev->pldm || !hdev->pdev) 5173 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5174 else 5175 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5176 5177 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5178 5179 rc = hl_poll_timeout( 5180 hdev, 5181 stlb_base + STLB_SRAM_INIT_OFFSET, 5182 status, 5183 !status, 5184 1000, 5185 timeout_usec); 5186 5187 if (rc) 5188 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5189 5190 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5191 if (rc) 5192 return rc; 5193 5194 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5195 5196 rc = hl_poll_timeout( 5197 hdev, 5198 stlb_base + STLB_INV_ALL_START_OFFSET, 5199 status, 5200 !status, 5201 1000, 5202 timeout_usec); 5203 5204 if (rc) 5205 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5206 5207 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5208 5209 return rc; 5210 } 5211 5212 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5213 { 5214 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5215 u32 mmu_base, stlb_base; 5216 int rc; 5217 5218 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5219 return 0; 5220 5221 mmu_base = mmPMMU_HBW_MMU_BASE; 5222 stlb_base = mmPMMU_HBW_STLB_BASE; 5223 5224 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5225 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5226 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5227 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5228 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5229 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5230 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5231 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5232 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5233 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5234 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5235 5236 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5237 5238 if (PAGE_SIZE == SZ_64K) { 5239 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5240 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5241 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5242 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5243 FIELD_PREP( 5244 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5245 1), 5246 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5247 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5248 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5249 } 5250 5251 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5252 5253 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5254 if (rc) 5255 return rc; 5256 5257 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5258 5259 return 0; 5260 } 5261 5262 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5263 int hmmu_id) 5264 { 5265 struct asic_fixed_properties *prop = &hdev->asic_prop; 5266 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5267 u32 offset, mmu_base, stlb_base, hw_cap; 5268 u8 dmmu_seq; 5269 int rc; 5270 5271 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5272 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5273 5274 /* 5275 * return if DMMU is already initialized or if it's not out of 5276 * isolation (due to cluster binning) 5277 */ 5278 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5279 return 0; 5280 5281 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5282 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5283 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5284 5285 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5286 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5287 5288 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5289 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5290 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5291 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5292 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5293 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5294 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5295 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5296 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5297 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5298 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5299 5300 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5301 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5302 5303 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5304 5305 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5306 if (rc) 5307 return rc; 5308 5309 gaudi2->hw_cap_initialized |= hw_cap; 5310 5311 return 0; 5312 } 5313 5314 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5315 { 5316 int rc, dcore_id, hmmu_id; 5317 5318 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5319 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5320 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5321 if (rc) 5322 return rc; 5323 } 5324 5325 return 0; 5326 } 5327 5328 static int gaudi2_mmu_init(struct hl_device *hdev) 5329 { 5330 int rc; 5331 5332 rc = gaudi2_pci_mmu_init(hdev); 5333 if (rc) 5334 return rc; 5335 5336 rc = gaudi2_hbm_mmu_init(hdev); 5337 if (rc) 5338 return rc; 5339 5340 return 0; 5341 } 5342 5343 static int gaudi2_hw_init(struct hl_device *hdev) 5344 { 5345 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5346 int rc; 5347 5348 /* Let's mark in the H/W that we have reached this point. We check 5349 * this value in the reset_before_init function to understand whether 5350 * we need to reset the chip before doing H/W init. This register is 5351 * cleared by the H/W upon H/W reset 5352 */ 5353 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5354 5355 /* Perform read from the device to make sure device is up */ 5356 RREG32(mmHW_STATE); 5357 5358 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5359 * So we set it here and if anyone tries to move it later to 5360 * a different address, there will be an error 5361 */ 5362 if (hdev->asic_prop.iatu_done_by_fw) 5363 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5364 5365 /* 5366 * Before pushing u-boot/linux to device, need to set the hbm bar to 5367 * base address of dram 5368 */ 5369 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5370 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5371 return -EIO; 5372 } 5373 5374 rc = gaudi2_init_cpu(hdev); 5375 if (rc) { 5376 dev_err(hdev->dev, "failed to initialize CPU\n"); 5377 return rc; 5378 } 5379 5380 gaudi2_init_scrambler_hbm(hdev); 5381 gaudi2_init_kdma(hdev); 5382 5383 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5384 if (rc) { 5385 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5386 return rc; 5387 } 5388 5389 rc = gaudi2->cpucp_info_get(hdev); 5390 if (rc) { 5391 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5392 return rc; 5393 } 5394 5395 rc = gaudi2_mmu_init(hdev); 5396 if (rc) 5397 return rc; 5398 5399 gaudi2_init_pdma(hdev); 5400 gaudi2_init_edma(hdev); 5401 gaudi2_init_sm(hdev); 5402 gaudi2_init_tpc(hdev); 5403 gaudi2_init_mme(hdev); 5404 gaudi2_init_rotator(hdev); 5405 gaudi2_init_dec(hdev); 5406 gaudi2_enable_timestamp(hdev); 5407 5408 rc = gaudi2_coresight_init(hdev); 5409 if (rc) 5410 goto disable_queues; 5411 5412 rc = gaudi2_enable_msix(hdev); 5413 if (rc) 5414 goto disable_queues; 5415 5416 /* Perform read from the device to flush all configuration */ 5417 RREG32(mmHW_STATE); 5418 5419 return 0; 5420 5421 disable_queues: 5422 gaudi2_disable_dma_qmans(hdev); 5423 gaudi2_disable_mme_qmans(hdev); 5424 gaudi2_disable_tpc_qmans(hdev); 5425 gaudi2_disable_rot_qmans(hdev); 5426 gaudi2_disable_nic_qmans(hdev); 5427 5428 gaudi2_disable_timestamp(hdev); 5429 5430 return rc; 5431 } 5432 5433 /** 5434 * gaudi2_send_hard_reset_cmd - common function to handle reset 5435 * 5436 * @hdev: pointer to the habanalabs device structure 5437 * 5438 * This function handles the various possible scenarios for reset. 5439 * It considers if reset is handled by driver\FW and what FW components are loaded 5440 */ 5441 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 5442 { 5443 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5444 bool heartbeat_reset, preboot_only, cpu_initialized = false; 5445 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5446 u32 cpu_boot_status; 5447 5448 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 5449 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 5450 5451 /* 5452 * Handle corner case where failure was at cpu management app load, 5453 * and driver didn't detect any failure while loading the FW, 5454 * then at such scenario driver will send only HALT_MACHINE 5455 * and no one will respond to this request since FW already back to preboot 5456 * and it cannot handle such cmd. 5457 * In this case next time the management app loads it'll check on events register 5458 * which will still have the halt indication, and will reboot the device. 5459 * The solution is to let preboot clear all relevant registers before next boot 5460 * once driver send COMMS_RST_DEV. 5461 */ 5462 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 5463 5464 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 5465 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 5466 cpu_initialized = true; 5467 5468 /* 5469 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 5470 * 1. FW reset: FW initiate the reset sequence 5471 * 2. driver reset: FW will start HALT sequence (the preparations for the 5472 * reset but not the reset itself as it is not implemented 5473 * on their part) and LKD will wait to let FW complete the 5474 * sequence before issuing the reset 5475 */ 5476 if (!preboot_only && cpu_initialized) { 5477 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 5478 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 5479 5480 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 5481 } 5482 5483 /* 5484 * When working with preboot (without Linux/Boot fit) we can 5485 * communicate only using the COMMS commands to issue halt/reset. 5486 * 5487 * For the case in which we are working with Linux/Bootfit this is a hail-mary 5488 * attempt to revive the card in the small chance that the f/w has 5489 * experienced a watchdog event, which caused it to return back to preboot. 5490 * In that case, triggering reset through GIC won't help. We need to 5491 * trigger the reset as if Linux wasn't loaded. 5492 * 5493 * We do it only if the reset cause was HB, because that would be the 5494 * indication of such an event. 5495 * 5496 * In case watchdog hasn't expired but we still got HB, then this won't 5497 * do any damage. 5498 */ 5499 5500 if (heartbeat_reset || preboot_only || !cpu_initialized) { 5501 if (hdev->asic_prop.hard_reset_done_by_fw) 5502 hl_fw_ask_hard_reset_without_linux(hdev); 5503 else 5504 hl_fw_ask_halt_machine_without_linux(hdev); 5505 } 5506 } 5507 5508 /** 5509 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 5510 * 5511 * @hdev: pointer to the habanalabs device structure 5512 * @reset_sleep_ms: sleep time in msec after reset 5513 * 5514 * This function executes hard reset based on if driver/FW should do the reset 5515 */ 5516 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms) 5517 { 5518 if (hdev->asic_prop.hard_reset_done_by_fw) { 5519 gaudi2_send_hard_reset_cmd(hdev); 5520 return; 5521 } 5522 5523 /* Set device to handle FLR by H/W as we will put the device 5524 * CPU to halt mode 5525 */ 5526 WREG32(mmPCIE_AUX_FLR_CTRL, 5527 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 5528 5529 gaudi2_send_hard_reset_cmd(hdev); 5530 5531 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 5532 } 5533 5534 /** 5535 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 5536 * 5537 * @hdev: pointer to the habanalabs device structure 5538 * @reset_sleep_ms: sleep time in msec after reset 5539 * @driver_performs_reset: true if driver should perform reset instead of f/w. 5540 * 5541 * This function executes soft reset based on if driver/FW should do the reset 5542 */ 5543 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms, 5544 bool driver_performs_reset) 5545 { 5546 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5547 5548 if (!driver_performs_reset) { 5549 /* set SP to indicate reset request sent to FW */ 5550 if (dyn_regs->cpu_rst_status) 5551 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 5552 else 5553 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5554 5555 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 5556 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 5557 return; 5558 } 5559 5560 /* Block access to engines, QMANs and SM during reset, these 5561 * RRs will be reconfigured after soft reset. 5562 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 5563 */ 5564 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 5565 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 5566 5567 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 5568 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 5569 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 5570 5571 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 5572 } 5573 5574 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms, 5575 u32 poll_timeout_us) 5576 { 5577 int i, rc = 0; 5578 u32 reg_val; 5579 5580 /* without this sleep reset will not work */ 5581 msleep(reset_sleep_ms); 5582 5583 /* We poll the BTM done indication multiple times after reset due to 5584 * a HW errata 'GAUDI2_0300' 5585 */ 5586 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5587 rc = hl_poll_timeout( 5588 hdev, 5589 mmPSOC_GLOBAL_CONF_BTM_FSM, 5590 reg_val, 5591 reg_val == 0, 5592 1000, 5593 poll_timeout_us); 5594 5595 if (rc) 5596 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 5597 } 5598 5599 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 5600 { 5601 int i, rc = 0; 5602 u32 reg_val; 5603 5604 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5605 rc = hl_poll_timeout( 5606 hdev, 5607 mmCPU_RST_STATUS_TO_HOST, 5608 reg_val, 5609 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 5610 1000, 5611 poll_timeout_us); 5612 5613 if (rc) 5614 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 5615 reg_val); 5616 } 5617 5618 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 5619 { 5620 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5621 u32 poll_timeout_us, reset_sleep_ms; 5622 bool driver_performs_reset = false; 5623 5624 if (hdev->pldm) { 5625 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 5626 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 5627 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 5628 } else { 5629 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 5630 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 5631 } 5632 5633 if (fw_reset) 5634 goto skip_reset; 5635 5636 gaudi2_reset_arcs(hdev); 5637 5638 if (hard_reset) { 5639 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 5640 gaudi2_execute_hard_reset(hdev, reset_sleep_ms); 5641 } else { 5642 /* 5643 * As we have to support also work with preboot only (which does not supports 5644 * soft reset) we have to make sure that security is disabled before letting driver 5645 * do the reset. user shall control the BFE flags to avoid asking soft reset in 5646 * secured device with preboot only. 5647 */ 5648 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 5649 !hdev->asic_prop.fw_security_enabled); 5650 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset); 5651 } 5652 5653 skip_reset: 5654 if (driver_performs_reset || hard_reset) 5655 /* 5656 * Instead of waiting for BTM indication we should wait for preboot ready: 5657 * Consider the below scenario: 5658 * 1. FW update is being triggered 5659 * - setting the dirty bit 5660 * 2. hard reset will be triggered due to the dirty bit 5661 * 3. FW initiates the reset: 5662 * - dirty bit cleared 5663 * - BTM indication cleared 5664 * - preboot ready indication cleared 5665 * 4. during hard reset: 5666 * - BTM indication will be set 5667 * - BIST test performed and another reset triggered 5668 * 5. only after this reset the preboot will set the preboot ready 5669 * 5670 * when polling on BTM indication alone we can lose sync with FW while trying to 5671 * communicate with FW that is during reset. 5672 * to overcome this we will always wait to preboot ready indication 5673 */ 5674 if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { 5675 msleep(reset_sleep_ms); 5676 hl_fw_wait_preboot_ready(hdev); 5677 } else { 5678 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); 5679 } 5680 else 5681 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 5682 5683 if (!gaudi2) 5684 return; 5685 5686 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 5687 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 5688 5689 /* 5690 * Clear NIC capability mask in order for driver to re-configure 5691 * NIC QMANs. NIC ports will not be re-configured during soft 5692 * reset as we call gaudi2_nic_init only during hard reset 5693 */ 5694 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 5695 5696 if (hard_reset) { 5697 gaudi2->hw_cap_initialized &= 5698 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 5699 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 5700 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 5701 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 5702 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 5703 5704 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 5705 } else { 5706 gaudi2->hw_cap_initialized &= 5707 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 5708 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 5709 HW_CAP_ROT_MASK); 5710 } 5711 } 5712 5713 static int gaudi2_suspend(struct hl_device *hdev) 5714 { 5715 int rc; 5716 5717 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 5718 if (rc) 5719 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 5720 5721 return rc; 5722 } 5723 5724 static int gaudi2_resume(struct hl_device *hdev) 5725 { 5726 return gaudi2_init_iatu(hdev); 5727 } 5728 5729 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 5730 void *cpu_addr, dma_addr_t dma_addr, size_t size) 5731 { 5732 int rc; 5733 5734 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 5735 VM_DONTCOPY | VM_NORESERVE); 5736 5737 #ifdef _HAS_DMA_MMAP_COHERENT 5738 5739 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 5740 if (rc) 5741 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 5742 5743 #else 5744 5745 rc = remap_pfn_range(vma, vma->vm_start, 5746 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 5747 size, vma->vm_page_prot); 5748 if (rc) 5749 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 5750 5751 #endif 5752 5753 return rc; 5754 } 5755 5756 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 5757 { 5758 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5759 u64 hw_cap_mask = 0; 5760 u64 hw_tpc_cap_bit = 0; 5761 u64 hw_nic_cap_bit = 0; 5762 u64 hw_test_cap_bit = 0; 5763 5764 switch (hw_queue_id) { 5765 case GAUDI2_QUEUE_ID_PDMA_0_0: 5766 case GAUDI2_QUEUE_ID_PDMA_0_1: 5767 case GAUDI2_QUEUE_ID_PDMA_1_0: 5768 hw_cap_mask = HW_CAP_PDMA_MASK; 5769 break; 5770 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 5771 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 5772 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 5773 break; 5774 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 5775 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 5776 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 5777 break; 5778 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 5779 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 5780 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 5781 break; 5782 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 5783 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 5784 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 5785 break; 5786 5787 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 5788 hw_test_cap_bit = HW_CAP_MME_SHIFT; 5789 break; 5790 5791 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 5792 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 5793 break; 5794 5795 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 5796 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 5797 break; 5798 5799 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 5800 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 5801 break; 5802 5803 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 5804 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 5805 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 5806 5807 /* special case where cap bit refers to the first queue id */ 5808 if (!hw_tpc_cap_bit) 5809 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 5810 break; 5811 5812 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 5813 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 5814 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 5815 break; 5816 5817 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 5818 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 5819 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 5820 break; 5821 5822 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 5823 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 5824 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 5825 break; 5826 5827 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 5828 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 5829 break; 5830 5831 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 5832 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 5833 break; 5834 5835 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 5836 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 5837 5838 /* special case where cap bit refers to the first queue id */ 5839 if (!hw_nic_cap_bit) 5840 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 5841 break; 5842 5843 case GAUDI2_QUEUE_ID_CPU_PQ: 5844 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 5845 5846 default: 5847 return false; 5848 } 5849 5850 if (hw_tpc_cap_bit) 5851 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 5852 5853 if (hw_nic_cap_bit) 5854 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 5855 5856 if (hw_test_cap_bit) 5857 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 5858 5859 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 5860 } 5861 5862 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 5863 { 5864 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5865 5866 switch (arc_id) { 5867 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5868 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5869 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 5870 5871 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5872 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5873 5874 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5875 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5876 5877 default: 5878 return false; 5879 } 5880 } 5881 5882 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5883 { 5884 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5885 5886 switch (arc_id) { 5887 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5888 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5889 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 5890 break; 5891 5892 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5893 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5894 break; 5895 5896 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5897 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5898 break; 5899 5900 default: 5901 return; 5902 } 5903 } 5904 5905 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5906 { 5907 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5908 5909 switch (arc_id) { 5910 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5911 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5912 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 5913 break; 5914 5915 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5916 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 5917 break; 5918 5919 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5920 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 5921 break; 5922 5923 default: 5924 return; 5925 } 5926 } 5927 5928 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 5929 { 5930 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5931 u32 pq_offset, reg_base, db_reg_offset, db_value; 5932 5933 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 5934 /* 5935 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 5936 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 5937 * number. 5938 */ 5939 pq_offset = (hw_queue_id & 0x3) * 4; 5940 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 5941 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 5942 } else { 5943 db_reg_offset = mmCPU_IF_PF_PQ_PI; 5944 } 5945 5946 db_value = pi; 5947 5948 /* ring the doorbell */ 5949 WREG32(db_reg_offset, db_value); 5950 5951 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 5952 /* make sure device CPU will read latest data from host */ 5953 mb(); 5954 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 5955 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 5956 } 5957 } 5958 5959 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 5960 { 5961 __le64 *pbd = (__le64 *) bd; 5962 5963 /* The QMANs are on the host memory so a simple copy suffice */ 5964 pqe[0] = pbd[0]; 5965 pqe[1] = pbd[1]; 5966 } 5967 5968 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 5969 dma_addr_t *dma_handle, gfp_t flags) 5970 { 5971 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 5972 } 5973 5974 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 5975 void *cpu_addr, dma_addr_t dma_handle) 5976 { 5977 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 5978 } 5979 5980 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 5981 u32 timeout, u64 *result) 5982 { 5983 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5984 5985 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 5986 if (result) 5987 *result = 0; 5988 return 0; 5989 } 5990 5991 if (!timeout) 5992 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 5993 5994 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 5995 } 5996 5997 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 5998 gfp_t mem_flags, dma_addr_t *dma_handle) 5999 { 6000 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 6001 return NULL; 6002 6003 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 6004 } 6005 6006 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 6007 { 6008 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 6009 } 6010 6011 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 6012 dma_addr_t *dma_handle) 6013 { 6014 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 6015 } 6016 6017 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 6018 { 6019 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 6020 } 6021 6022 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 6023 enum dma_data_direction dir) 6024 { 6025 dma_addr_t dma_addr; 6026 6027 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 6028 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 6029 return 0; 6030 6031 return dma_addr; 6032 } 6033 6034 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 6035 enum dma_data_direction dir) 6036 { 6037 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 6038 } 6039 6040 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 6041 { 6042 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 6043 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6044 6045 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 6046 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 6047 return -EINVAL; 6048 } 6049 6050 /* Just check if CB address is valid */ 6051 6052 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6053 parser->user_cb_size, 6054 asic_prop->sram_user_base_address, 6055 asic_prop->sram_end_address)) 6056 return 0; 6057 6058 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6059 parser->user_cb_size, 6060 asic_prop->dram_user_base_address, 6061 asic_prop->dram_end_address)) 6062 return 0; 6063 6064 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 6065 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6066 parser->user_cb_size, 6067 asic_prop->dmmu.start_addr, 6068 asic_prop->dmmu.end_addr)) 6069 return 0; 6070 6071 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 6072 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6073 parser->user_cb_size, 6074 asic_prop->pmmu.start_addr, 6075 asic_prop->pmmu.end_addr) || 6076 hl_mem_area_inside_range( 6077 (u64) (uintptr_t) parser->user_cb, 6078 parser->user_cb_size, 6079 asic_prop->pmmu_huge.start_addr, 6080 asic_prop->pmmu_huge.end_addr)) 6081 return 0; 6082 6083 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 6084 if (!hdev->pdev) 6085 return 0; 6086 6087 if (!device_iommu_mapped(&hdev->pdev->dev)) 6088 return 0; 6089 } 6090 6091 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 6092 parser->user_cb, parser->user_cb_size); 6093 6094 return -EFAULT; 6095 } 6096 6097 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 6098 { 6099 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6100 6101 if (!parser->is_kernel_allocated_cb) 6102 return gaudi2_validate_cb_address(hdev, parser); 6103 6104 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 6105 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 6106 return -EINVAL; 6107 } 6108 6109 return 0; 6110 } 6111 6112 static int gaudi2_send_heartbeat(struct hl_device *hdev) 6113 { 6114 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6115 6116 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6117 return 0; 6118 6119 return hl_fw_send_heartbeat(hdev); 6120 } 6121 6122 /* This is an internal helper function, used to update the KDMA mmu props. 6123 * Should be called with a proper kdma lock. 6124 */ 6125 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 6126 bool mmu_bypass, u32 asid) 6127 { 6128 u32 rw_asid, rw_mmu_bp; 6129 6130 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6131 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6132 6133 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 6134 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 6135 6136 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 6137 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 6138 } 6139 6140 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 6141 u32 mon_payload, u32 sync_value) 6142 { 6143 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 6144 u8 mask; 6145 6146 sob_offset = sob_id * 4; 6147 mon_offset = mon_id * 4; 6148 6149 /* Reset the SOB value */ 6150 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 6151 6152 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 6153 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 6154 6155 /* Configure this address with CS index because CQ_EN is set */ 6156 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 6157 6158 sync_group_id = sob_id / 8; 6159 mask = ~(1 << (sob_id & 0x7)); 6160 mode = 1; /* comparison mode is "equal to" */ 6161 6162 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 6163 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 6164 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 6165 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 6166 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 6167 } 6168 6169 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6170 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6171 u64 src_addr, u64 dst_addr, 6172 u32 size, bool is_memset) 6173 { 6174 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6175 struct hl_cq_entry *cq_base; 6176 struct hl_cq *cq; 6177 u64 comp_addr; 6178 int rc; 6179 6180 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6181 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6182 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6183 6184 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6185 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6186 6187 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6188 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6189 6190 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6191 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6192 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6193 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6194 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6195 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6196 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6197 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6198 6199 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6200 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6201 6202 if (is_memset) 6203 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6204 6205 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6206 6207 /* Wait for completion */ 6208 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6209 cq_base = cq->kernel_address; 6210 polling_addr = (u32 *)&cq_base[cq->ci]; 6211 6212 if (hdev->pldm) 6213 /* for each 1MB 20 second of timeout */ 6214 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6215 else 6216 timeout = KDMA_TIMEOUT_USEC; 6217 6218 /* Polling */ 6219 rc = hl_poll_timeout_memory( 6220 hdev, 6221 polling_addr, 6222 status, 6223 (status == 1), 6224 1000, 6225 timeout, 6226 true); 6227 6228 *polling_addr = 0; 6229 6230 if (rc) { 6231 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6232 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6233 return rc; 6234 } 6235 6236 cq->ci = hl_cq_inc_ptr(cq->ci); 6237 6238 return 0; 6239 } 6240 6241 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6242 { 6243 u32 i; 6244 6245 for (i = 0 ; i < size ; i += sizeof(u32)) 6246 WREG32(addr + i, val); 6247 } 6248 6249 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6250 { 6251 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6252 6253 if (enable) { 6254 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6255 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6256 } else { 6257 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6258 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6259 } 6260 } 6261 6262 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) 6263 { 6264 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 6265 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6266 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; 6267 struct packet_msg_short *msg_short_pkt; 6268 dma_addr_t pkt_dma_addr; 6269 size_t pkt_size; 6270 int rc; 6271 6272 if (hdev->pldm) 6273 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6274 else 6275 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6276 6277 pkt_size = sizeof(*msg_short_pkt); 6278 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); 6279 if (!msg_short_pkt) { 6280 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", 6281 hw_queue_id); 6282 return -ENOMEM; 6283 } 6284 6285 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6286 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6287 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6288 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6289 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6290 6291 msg_short_pkt->value = cpu_to_le32(sob_val); 6292 msg_short_pkt->ctl = cpu_to_le32(tmp); 6293 6294 /* Reset the SOB value */ 6295 WREG32(sob_addr, 0); 6296 6297 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 6298 if (rc) { 6299 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", 6300 hw_queue_id); 6301 goto free_pkt; 6302 } 6303 6304 rc = hl_poll_timeout( 6305 hdev, 6306 sob_addr, 6307 tmp, 6308 (tmp == sob_val), 6309 1000, 6310 timeout_usec); 6311 6312 if (rc == -ETIMEDOUT) { 6313 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6314 hw_queue_id, tmp); 6315 rc = -EIO; 6316 } 6317 6318 /* Reset the SOB value */ 6319 WREG32(sob_addr, 0); 6320 6321 free_pkt: 6322 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); 6323 return rc; 6324 } 6325 6326 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6327 { 6328 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6329 6330 /* 6331 * check capability here as send_cpu_message() won't update the result 6332 * value if no capability 6333 */ 6334 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6335 return 0; 6336 6337 return hl_fw_test_cpu_queue(hdev); 6338 } 6339 6340 static int gaudi2_test_queues(struct hl_device *hdev) 6341 { 6342 int i, rc, ret_val = 0; 6343 6344 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6345 if (!gaudi2_is_queue_enabled(hdev, i)) 6346 continue; 6347 6348 gaudi2_qman_set_test_mode(hdev, i, true); 6349 rc = gaudi2_test_queue(hdev, i); 6350 gaudi2_qman_set_test_mode(hdev, i, false); 6351 6352 if (rc) { 6353 ret_val = -EINVAL; 6354 goto done; 6355 } 6356 } 6357 6358 rc = gaudi2_test_cpu_queue(hdev); 6359 if (rc) { 6360 ret_val = -EINVAL; 6361 goto done; 6362 } 6363 6364 done: 6365 return ret_val; 6366 } 6367 6368 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6369 { 6370 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6371 size_t irq_arr_size; 6372 6373 /* TODO: missing gaudi2_nic_resume. 6374 * Until implemented nic_hw_cap_initialized will remain zeroed 6375 */ 6376 gaudi2_init_arcs(hdev); 6377 gaudi2_scrub_arcs_dccm(hdev); 6378 gaudi2_init_security(hdev); 6379 6380 /* Unmask all IRQs since some could have been received during the soft reset */ 6381 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 6382 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 6383 } 6384 6385 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 6386 struct iterate_module_ctx *ctx) 6387 { 6388 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 6389 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 6390 bool is_eng_idle; 6391 int engine_idx; 6392 6393 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 6394 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 6395 else 6396 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 6397 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 6398 6399 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 6400 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 6401 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 6402 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 6403 6404 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6405 IS_TPC_IDLE(tpc_cfg_sts); 6406 *(idle_data->is_idle) &= is_eng_idle; 6407 6408 if (idle_data->mask && !is_eng_idle) 6409 set_bit(engine_idx, idle_data->mask); 6410 6411 if (idle_data->e) 6412 hl_engine_data_sprintf(idle_data->e, 6413 idle_data->tpc_fmt, dcore, inst, 6414 is_eng_idle ? "Y" : "N", 6415 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6416 } 6417 6418 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6419 struct engines_data *e) 6420 { 6421 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, 6422 mme_arch_sts, dec_swreg15, dec_enabled_bit; 6423 struct asic_fixed_properties *prop = &hdev->asic_prop; 6424 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n"; 6425 unsigned long *mask = (unsigned long *) mask_arr; 6426 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n"; 6427 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 6428 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 6429 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n"; 6430 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 6431 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 6432 bool is_idle = true, is_eng_idle; 6433 u64 offset; 6434 6435 struct gaudi2_tpc_idle_data tpc_idle_data = { 6436 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 6437 .e = e, 6438 .mask = mask, 6439 .is_idle = &is_idle, 6440 }; 6441 struct iterate_module_ctx tpc_iter = { 6442 .fn = &gaudi2_is_tpc_engine_idle, 6443 .data = &tpc_idle_data, 6444 }; 6445 6446 int engine_idx, i, j; 6447 6448 /* EDMA, Two engines per Dcore */ 6449 if (e) 6450 hl_engine_data_sprintf(e, 6451 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6452 "---- ---- ------- ------------ ----------------------\n"); 6453 6454 for (i = 0; i < NUM_OF_DCORES; i++) { 6455 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 6456 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 6457 6458 if (!(prop->edma_enabled_mask & BIT(seq))) 6459 continue; 6460 6461 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 6462 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6463 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 6464 6465 dma_core_idle_ind_mask = 6466 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset); 6467 6468 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 6469 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 6470 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 6471 6472 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6473 IS_DMA_IDLE(dma_core_idle_ind_mask); 6474 is_idle &= is_eng_idle; 6475 6476 if (mask && !is_eng_idle) 6477 set_bit(engine_idx, mask); 6478 6479 if (e) 6480 hl_engine_data_sprintf(e, edma_fmt, i, j, 6481 is_eng_idle ? "Y" : "N", 6482 qm_glbl_sts0, 6483 dma_core_idle_ind_mask); 6484 } 6485 } 6486 6487 /* PDMA, Two engines in Full chip */ 6488 if (e) 6489 hl_engine_data_sprintf(e, 6490 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6491 "---- ------- ------------ ----------------------\n"); 6492 6493 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6494 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 6495 offset = i * PDMA_OFFSET; 6496 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset); 6497 6498 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 6499 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 6500 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 6501 6502 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6503 IS_DMA_IDLE(dma_core_idle_ind_mask); 6504 is_idle &= is_eng_idle; 6505 6506 if (mask && !is_eng_idle) 6507 set_bit(engine_idx, mask); 6508 6509 if (e) 6510 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 6511 qm_glbl_sts0, dma_core_idle_ind_mask); 6512 } 6513 6514 /* NIC, twelve macros in Full chip */ 6515 if (e && hdev->nic_ports_mask) 6516 hl_engine_data_sprintf(e, 6517 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6518 "--- ------- ------------ ----------\n"); 6519 6520 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6521 if (!(i & 1)) 6522 offset = i / 2 * NIC_OFFSET; 6523 else 6524 offset += NIC_QM_OFFSET; 6525 6526 if (!(hdev->nic_ports_mask & BIT(i))) 6527 continue; 6528 6529 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 6530 6531 6532 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 6533 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 6534 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 6535 6536 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6537 is_idle &= is_eng_idle; 6538 6539 if (mask && !is_eng_idle) 6540 set_bit(engine_idx, mask); 6541 6542 if (e) 6543 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 6544 qm_glbl_sts0, qm_cgm_sts); 6545 } 6546 6547 if (e) 6548 hl_engine_data_sprintf(e, 6549 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6550 "--- ---- ------- ------------ ---------------\n"); 6551 /* MME, one per Dcore */ 6552 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6553 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 6554 offset = i * DCORE_OFFSET; 6555 6556 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 6557 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 6558 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 6559 6560 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6561 is_idle &= is_eng_idle; 6562 6563 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 6564 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 6565 is_idle &= is_eng_idle; 6566 6567 if (e) 6568 hl_engine_data_sprintf(e, mme_fmt, i, "N", 6569 is_eng_idle ? "Y" : "N", 6570 qm_glbl_sts0, 6571 mme_arch_sts); 6572 6573 if (mask && !is_eng_idle) 6574 set_bit(engine_idx, mask); 6575 } 6576 6577 /* 6578 * TPC 6579 */ 6580 if (e && prop->tpc_enabled_mask) 6581 hl_engine_data_sprintf(e, 6582 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" 6583 "---- --- -------- ------------ ---------- ----------------------\n"); 6584 6585 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6586 6587 /* Decoders, two each Dcore and two shared PCIe decoders */ 6588 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6589 hl_engine_data_sprintf(e, 6590 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 6591 "---- --- ------- ---------------\n"); 6592 6593 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6594 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 6595 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 6596 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 6597 continue; 6598 6599 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 6600 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6601 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 6602 6603 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 6604 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6605 is_idle &= is_eng_idle; 6606 6607 if (mask && !is_eng_idle) 6608 set_bit(engine_idx, mask); 6609 6610 if (e) 6611 hl_engine_data_sprintf(e, dec_fmt, i, j, 6612 is_eng_idle ? "Y" : "N", dec_swreg15); 6613 } 6614 } 6615 6616 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6617 hl_engine_data_sprintf(e, 6618 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 6619 "-------- ------- ---------------\n"); 6620 6621 /* Check shared(PCIe) decoders */ 6622 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 6623 dec_enabled_bit = PCIE_DEC_SHIFT + i; 6624 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 6625 continue; 6626 6627 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 6628 offset = i * DCORE_DEC_OFFSET; 6629 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 6630 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6631 is_idle &= is_eng_idle; 6632 6633 if (mask && !is_eng_idle) 6634 set_bit(engine_idx, mask); 6635 6636 if (e) 6637 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 6638 is_eng_idle ? "Y" : "N", dec_swreg15); 6639 } 6640 6641 if (e) 6642 hl_engine_data_sprintf(e, 6643 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6644 "---- ---- ------- ------------ ---------- -------------\n"); 6645 6646 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6647 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 6648 6649 offset = i * ROT_OFFSET; 6650 6651 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 6652 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 6653 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 6654 6655 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6656 is_idle &= is_eng_idle; 6657 6658 if (mask && !is_eng_idle) 6659 set_bit(engine_idx, mask); 6660 6661 if (e) 6662 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6663 qm_glbl_sts0, qm_cgm_sts, "-"); 6664 } 6665 6666 return is_idle; 6667 } 6668 6669 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 6670 __acquires(&gaudi2->hw_queues_lock) 6671 { 6672 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6673 6674 spin_lock(&gaudi2->hw_queues_lock); 6675 } 6676 6677 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 6678 __releases(&gaudi2->hw_queues_lock) 6679 { 6680 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6681 6682 spin_unlock(&gaudi2->hw_queues_lock); 6683 } 6684 6685 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 6686 { 6687 return hdev->pdev->device; 6688 } 6689 6690 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 6691 { 6692 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6693 6694 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6695 return 0; 6696 6697 return hl_fw_get_eeprom_data(hdev, data, max_size); 6698 } 6699 6700 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 6701 { 6702 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 6703 } 6704 6705 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 6706 { 6707 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6708 6709 if (aggregate) { 6710 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 6711 return gaudi2->events_stat_aggregate; 6712 } 6713 6714 *size = (u32) sizeof(gaudi2->events_stat); 6715 return gaudi2->events_stat; 6716 } 6717 6718 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 6719 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6720 { 6721 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 6722 dcore_vdec_id + DCORE_OFFSET * dcore_id; 6723 6724 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6725 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6726 6727 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6728 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6729 6730 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6731 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6732 6733 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6734 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6735 6736 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6737 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6738 } 6739 6740 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 6741 { 6742 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6743 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6744 struct asic_fixed_properties *prop = &hdev->asic_prop; 6745 u32 dcore_offset = dcore_id * DCORE_OFFSET; 6746 u32 vdec_id, i, ports_offset, reg_val; 6747 u8 edma_seq_base; 6748 6749 /* EDMA */ 6750 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 6751 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 6752 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6753 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6754 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6755 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6756 } 6757 6758 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 6759 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6760 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6761 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6762 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6763 } 6764 6765 /* Sync Mngr */ 6766 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 6767 /* 6768 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 6769 * for any access type 6770 */ 6771 if (dcore_id > 0) { 6772 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 6773 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 6774 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 6775 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 6776 } 6777 6778 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 6779 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 6780 6781 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 6782 ports_offset = i * DCORE_MME_SBTE_OFFSET; 6783 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 6784 dcore_offset + ports_offset, 0); 6785 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 6786 dcore_offset + ports_offset, rw_asid); 6787 } 6788 6789 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 6790 ports_offset = i * DCORE_MME_WB_OFFSET; 6791 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 6792 dcore_offset + ports_offset, 0); 6793 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 6794 dcore_offset + ports_offset, rw_asid); 6795 } 6796 6797 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6798 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6799 6800 /* 6801 * Decoders 6802 */ 6803 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 6804 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 6805 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 6806 } 6807 } 6808 6809 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 6810 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6811 { 6812 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 6813 6814 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6815 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6816 6817 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6818 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6819 6820 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6821 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6822 6823 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6824 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6825 6826 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6827 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6828 } 6829 6830 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 6831 u32 rw_asid, u32 rw_mmu_bp) 6832 { 6833 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 6834 6835 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 6836 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 6837 } 6838 6839 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 6840 { 6841 u32 reg_base, reg_offset, reg_val = 0; 6842 6843 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 6844 6845 /* Enable MMU and configure asid for all relevant ARC regions */ 6846 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 6847 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 6848 6849 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 6850 WREG32(reg_base + reg_offset, reg_val); 6851 6852 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 6853 WREG32(reg_base + reg_offset, reg_val); 6854 6855 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 6856 WREG32(reg_base + reg_offset, reg_val); 6857 6858 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 6859 WREG32(reg_base + reg_offset, reg_val); 6860 6861 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 6862 WREG32(reg_base + reg_offset, reg_val); 6863 6864 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 6865 WREG32(reg_base + reg_offset, reg_val); 6866 6867 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 6868 WREG32(reg_base + reg_offset, reg_val); 6869 6870 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 6871 WREG32(reg_base + reg_offset, reg_val); 6872 6873 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 6874 WREG32(reg_base + reg_offset, reg_val); 6875 6876 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 6877 WREG32(reg_base + reg_offset, reg_val); 6878 6879 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 6880 WREG32(reg_base + reg_offset, reg_val); 6881 } 6882 6883 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 6884 { 6885 int i; 6886 6887 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 6888 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 6889 6890 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6891 gaudi2_arc_mmu_prepare(hdev, i, asid); 6892 6893 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 6894 if (!gaudi2_is_queue_enabled(hdev, i)) 6895 continue; 6896 6897 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 6898 } 6899 6900 return 0; 6901 } 6902 6903 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 6904 { 6905 struct asic_fixed_properties *prop = &hdev->asic_prop; 6906 u32 rw_asid, offset; 6907 int rc, i; 6908 6909 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 6910 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 6911 6912 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6913 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6914 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6915 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6916 6917 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6918 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6919 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6920 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6921 6922 /* ROT */ 6923 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6924 offset = i * ROT_OFFSET; 6925 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 6926 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6927 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 6928 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 6929 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 6930 } 6931 6932 /* Shared Decoders are the last bits in the decoders mask */ 6933 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 6934 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 6935 6936 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 6937 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 6938 6939 /* arc farm arc dup eng */ 6940 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6941 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 6942 6943 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 6944 if (rc) 6945 return rc; 6946 6947 return 0; 6948 } 6949 6950 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 6951 struct iterate_module_ctx *ctx) 6952 { 6953 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 6954 6955 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 6956 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 6957 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6958 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 6959 } 6960 6961 /* zero the MMUBP and set the ASID */ 6962 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 6963 { 6964 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6965 struct gaudi2_tpc_mmu_data tpc_mmu_data; 6966 struct iterate_module_ctx tpc_iter = { 6967 .fn = &gaudi2_tpc_mmu_prepare, 6968 .data = &tpc_mmu_data, 6969 }; 6970 int rc, i; 6971 6972 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 6973 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6974 return -EINVAL; 6975 } 6976 6977 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 6978 return 0; 6979 6980 rc = gaudi2_mmu_shared_prepare(hdev, asid); 6981 if (rc) 6982 return rc; 6983 6984 /* configure DCORE MMUs */ 6985 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6986 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6987 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6988 for (i = 0 ; i < NUM_OF_DCORES ; i++) 6989 gaudi2_mmu_dcore_prepare(hdev, i, asid); 6990 6991 return 0; 6992 } 6993 6994 static inline bool is_info_event(u32 event) 6995 { 6996 switch (event) { 6997 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 6998 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 6999 7000 /* return in case of NIC status event - these events are received periodically and not as 7001 * an indication to an error. 7002 */ 7003 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 7004 return true; 7005 default: 7006 return false; 7007 } 7008 } 7009 7010 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 7011 bool ratelimited, const char *fmt, ...) 7012 { 7013 struct va_format vaf; 7014 va_list args; 7015 7016 va_start(args, fmt); 7017 vaf.fmt = fmt; 7018 vaf.va = &args; 7019 7020 if (ratelimited) 7021 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 7022 gaudi2_irq_map_table[event_type].valid ? 7023 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7024 else 7025 dev_err(hdev->dev, "%s: %pV\n", 7026 gaudi2_irq_map_table[event_type].valid ? 7027 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7028 7029 va_end(args); 7030 } 7031 7032 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7033 struct hl_eq_ecc_data *ecc_data) 7034 { 7035 u64 ecc_address = 0, ecc_syndrom = 0; 7036 u8 memory_wrapper_idx = 0; 7037 7038 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7039 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7040 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7041 7042 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 7043 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n", 7044 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 7045 7046 return !!ecc_data->is_critical; 7047 } 7048 7049 /* 7050 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 7051 * 7052 * @idx: the current pi/ci value 7053 * @q_len: the queue length (power of 2) 7054 * 7055 * @return the cyclically decremented index 7056 */ 7057 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 7058 { 7059 u32 mask = q_len - 1; 7060 7061 /* 7062 * modular decrement is equivalent to adding (queue_size -1) 7063 * later we take LSBs to make sure the value is in the 7064 * range [0, queue_len - 1] 7065 */ 7066 return (idx + q_len - 1) & mask; 7067 } 7068 7069 /** 7070 * gaudi2_print_sw_config_stream_data - print SW config stream data 7071 * 7072 * @hdev: pointer to the habanalabs device structure 7073 * @stream: the QMAN's stream 7074 * @qman_base: base address of QMAN registers block 7075 */ 7076 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 7077 u32 stream, u64 qman_base) 7078 { 7079 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 7080 u32 cq_ptr_lo_off, size; 7081 7082 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 7083 7084 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 7085 stream * cq_ptr_lo_off; 7086 7087 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7088 7089 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7090 7091 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 7092 size = RREG32(cq_tsize); 7093 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 7094 stream, cq_ptr, size); 7095 } 7096 7097 /** 7098 * gaudi2_print_last_pqes_on_err - print last PQEs on error 7099 * 7100 * @hdev: pointer to the habanalabs device structure 7101 * @qid_base: first QID of the QMAN (out of 4 streams) 7102 * @stream: the QMAN's stream 7103 * @qman_base: base address of QMAN registers block 7104 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 7105 */ 7106 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 7107 u64 qman_base, bool pr_sw_conf) 7108 { 7109 u32 ci, qm_ci_stream_off; 7110 struct hl_hw_queue *q; 7111 u64 pq_ci; 7112 int i; 7113 7114 q = &hdev->kernel_queues[qid_base + stream]; 7115 7116 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 7117 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 7118 stream * qm_ci_stream_off; 7119 7120 hdev->asic_funcs->hw_queues_lock(hdev); 7121 7122 if (pr_sw_conf) 7123 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7124 7125 ci = RREG32(pq_ci); 7126 7127 /* we should start printing form ci -1 */ 7128 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7129 7130 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 7131 struct hl_bd *bd; 7132 u64 addr; 7133 u32 len; 7134 7135 bd = q->kernel_address; 7136 bd += ci; 7137 7138 len = le32_to_cpu(bd->len); 7139 /* len 0 means uninitialized entry- break */ 7140 if (!len) 7141 break; 7142 7143 addr = le64_to_cpu(bd->ptr); 7144 7145 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 7146 stream, ci, addr, len); 7147 7148 /* get previous ci, wrap if needed */ 7149 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7150 } 7151 7152 hdev->asic_funcs->hw_queues_unlock(hdev); 7153 } 7154 7155 /** 7156 * print_qman_data_on_err - extract QMAN data on error 7157 * 7158 * @hdev: pointer to the habanalabs device structure 7159 * @qid_base: first QID of the QMAN (out of 4 streams) 7160 * @stream: the QMAN's stream 7161 * @qman_base: base address of QMAN registers block 7162 * 7163 * This function attempt to extract as much data as possible on QMAN error. 7164 * On upper CP print the SW config stream data and last 8 PQEs. 7165 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7166 */ 7167 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7168 { 7169 u32 i; 7170 7171 if (stream != QMAN_STREAMS) { 7172 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7173 return; 7174 } 7175 7176 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7177 7178 for (i = 0 ; i < QMAN_STREAMS ; i++) 7179 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7180 } 7181 7182 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7183 u64 qman_base, u32 qid_base) 7184 { 7185 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7186 u64 glbl_sts_addr, arb_err_addr; 7187 char reg_desc[32]; 7188 7189 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7190 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7191 7192 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7193 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7194 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7195 7196 if (!glbl_sts_val) 7197 continue; 7198 7199 if (i == QMAN_STREAMS) { 7200 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7201 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7202 } else { 7203 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7204 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7205 } 7206 7207 for (j = 0 ; j < num_error_causes ; j++) 7208 if (glbl_sts_val & BIT(j)) { 7209 gaudi2_print_event(hdev, event_type, true, 7210 "%s. err cause: %s", reg_desc, 7211 i == QMAN_STREAMS ? 7212 gaudi2_qman_lower_cp_error_cause[j] : 7213 gaudi2_qman_error_cause[j]); 7214 error_count++; 7215 } 7216 7217 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7218 } 7219 7220 arb_err_val = RREG32(arb_err_addr); 7221 7222 if (!arb_err_val) 7223 goto out; 7224 7225 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7226 if (arb_err_val & BIT(j)) { 7227 gaudi2_print_event(hdev, event_type, true, 7228 "ARB_ERR. err cause: %s", 7229 gaudi2_qman_arb_error_cause[j]); 7230 error_count++; 7231 } 7232 } 7233 7234 out: 7235 return error_count; 7236 } 7237 7238 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7239 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7240 enum gaudi2_engine_id id, u64 *event_mask) 7241 { 7242 u32 razwi_hi, razwi_lo, razwi_xy; 7243 u16 eng_id = id; 7244 u8 rd_wr_flag; 7245 7246 if (is_write) { 7247 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7248 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7249 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7250 rd_wr_flag = HL_RAZWI_WRITE; 7251 } else { 7252 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7253 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7254 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7255 rd_wr_flag = HL_RAZWI_READ; 7256 } 7257 7258 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7259 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7260 7261 dev_err_ratelimited(hdev->dev, 7262 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7263 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7264 } 7265 7266 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7267 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7268 enum gaudi2_engine_id id, u64 *event_mask) 7269 { 7270 u64 razwi_addr = CFG_BASE; 7271 u32 razwi_xy; 7272 u16 eng_id = id; 7273 u8 rd_wr_flag; 7274 7275 if (is_write) { 7276 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7277 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7278 rd_wr_flag = HL_RAZWI_WRITE; 7279 } else { 7280 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7281 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7282 rd_wr_flag = HL_RAZWI_READ; 7283 } 7284 7285 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7286 dev_err_ratelimited(hdev->dev, 7287 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n", 7288 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7289 razwi_xy); 7290 } 7291 7292 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7293 enum razwi_event_sources module, u8 module_idx) 7294 { 7295 switch (module) { 7296 case RAZWI_TPC: 7297 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7298 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7299 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7300 (module_idx % NUM_OF_TPC_PER_DCORE) + 7301 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7302 7303 case RAZWI_MME: 7304 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 7305 (module_idx * ENGINE_ID_DCORE_OFFSET)); 7306 7307 case RAZWI_EDMA: 7308 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7309 (module_idx % NUM_OF_EDMA_PER_DCORE)); 7310 7311 case RAZWI_PDMA: 7312 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 7313 7314 case RAZWI_NIC: 7315 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 7316 7317 case RAZWI_DEC: 7318 if (module_idx == 8) 7319 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 7320 7321 if (module_idx == 9) 7322 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 7323 ; 7324 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7325 (module_idx % NUM_OF_DEC_PER_DCORE) + 7326 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7327 7328 case RAZWI_ROT: 7329 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 7330 7331 default: 7332 return GAUDI2_ENGINE_ID_SIZE; 7333 } 7334 } 7335 7336 /* 7337 * This function handles RR(Range register) hit events. 7338 * raised be initiators not PSOC RAZWI. 7339 */ 7340 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 7341 enum razwi_event_sources module, u8 module_idx, 7342 u8 module_sub_idx, u64 *event_mask) 7343 { 7344 bool via_sft = false; 7345 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id; 7346 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr; 7347 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 7348 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 7349 char initiator_name[64]; 7350 7351 switch (module) { 7352 case RAZWI_TPC: 7353 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; 7354 7355 /* TODO : remove this check and depend only on tpc routers table 7356 * when SW-118828 is resolved 7357 */ 7358 if (!hdev->asic_prop.fw_security_enabled && 7359 ((module_idx == 0) || (module_idx == 1))) 7360 lbw_rtr_id = DCORE0_RTR0; 7361 else 7362 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; 7363 sprintf(initiator_name, "TPC_%u", module_idx); 7364 break; 7365 case RAZWI_MME: 7366 sprintf(initiator_name, "MME_%u", module_idx); 7367 switch (module_sub_idx) { 7368 case MME_WAP0: 7369 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 7370 break; 7371 case MME_WAP1: 7372 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 7373 break; 7374 case MME_WRITE: 7375 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 7376 break; 7377 case MME_READ: 7378 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 7379 break; 7380 case MME_SBTE0: 7381 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 7382 break; 7383 case MME_SBTE1: 7384 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 7385 break; 7386 case MME_SBTE2: 7387 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 7388 break; 7389 case MME_SBTE3: 7390 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 7391 break; 7392 case MME_SBTE4: 7393 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 7394 break; 7395 default: 7396 return; 7397 } 7398 lbw_rtr_id = hbw_rtr_id; 7399 break; 7400 case RAZWI_EDMA: 7401 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx]; 7402 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE; 7403 /* SFT has separate MSTR_IF for LBW, only there we can 7404 * read the LBW razwi related registers 7405 */ 7406 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE + 7407 dcore_id * SFT_DCORE_OFFSET; 7408 via_sft = true; 7409 sprintf(initiator_name, "EDMA_%u", module_idx); 7410 break; 7411 case RAZWI_PDMA: 7412 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx]; 7413 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx]; 7414 sprintf(initiator_name, "PDMA_%u", module_idx); 7415 break; 7416 case RAZWI_NIC: 7417 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx]; 7418 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx]; 7419 sprintf(initiator_name, "NIC_%u", module_idx); 7420 break; 7421 case RAZWI_DEC: 7422 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx]; 7423 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx]; 7424 sprintf(initiator_name, "DEC_%u", module_idx); 7425 break; 7426 case RAZWI_ROT: 7427 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx]; 7428 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx]; 7429 sprintf(initiator_name, "ROT_%u", module_idx); 7430 break; 7431 default: 7432 return; 7433 } 7434 7435 /* Find router mstr_if register base */ 7436 if (!via_sft) { 7437 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE; 7438 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE; 7439 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 7440 dcore_id * DCORE_OFFSET + 7441 dcore_rtr_id * DCORE_RTR_OFFSET + 7442 RTR_MSTR_IF_OFFSET; 7443 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr + 7444 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET); 7445 } 7446 7447 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 7448 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 7449 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 7450 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7451 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7452 7453 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 7454 if (hbw_shrd_aw) { 7455 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true, 7456 initiator_name, eng_id, event_mask); 7457 7458 /* Clear event indication */ 7459 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 7460 } 7461 7462 if (hbw_shrd_ar) { 7463 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false, 7464 initiator_name, eng_id, event_mask); 7465 7466 /* Clear event indication */ 7467 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 7468 } 7469 7470 if (lbw_shrd_aw) { 7471 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true, 7472 initiator_name, eng_id, event_mask); 7473 7474 /* Clear event indication */ 7475 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 7476 } 7477 7478 if (lbw_shrd_ar) { 7479 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false, 7480 initiator_name, eng_id, event_mask); 7481 7482 /* Clear event indication */ 7483 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 7484 } 7485 } 7486 7487 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 7488 { 7489 struct asic_fixed_properties *prop = &hdev->asic_prop; 7490 u8 mod_idx, sub_mod; 7491 7492 /* check all TPCs */ 7493 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 7494 if (prop->tpc_enabled_mask & BIT(mod_idx)) 7495 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL); 7496 } 7497 7498 /* check all MMEs */ 7499 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7500 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 7501 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 7502 sub_mod, NULL); 7503 7504 /* check all EDMAs */ 7505 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7506 if (prop->edma_enabled_mask & BIT(mod_idx)) 7507 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL); 7508 7509 /* check all PDMAs */ 7510 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 7511 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL); 7512 7513 /* check all NICs */ 7514 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 7515 if (hdev->nic_ports_mask & BIT(mod_idx)) 7516 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 7517 NULL); 7518 7519 /* check all DECs */ 7520 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 7521 if (prop->decoder_enabled_mask & BIT(mod_idx)) 7522 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL); 7523 7524 /* check all ROTs */ 7525 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 7526 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); 7527 } 7528 7529 static const char *gaudi2_get_initiators_name(u32 rtr_id) 7530 { 7531 switch (rtr_id) { 7532 case DCORE0_RTR0: 7533 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; 7534 case DCORE0_RTR1: 7535 return "TPC0/1"; 7536 case DCORE0_RTR2: 7537 return "TPC2/3"; 7538 case DCORE0_RTR3: 7539 return "TPC4/5"; 7540 case DCORE0_RTR4: 7541 return "MME0_SBTE0/1"; 7542 case DCORE0_RTR5: 7543 return "MME0_WAP0/SBTE2"; 7544 case DCORE0_RTR6: 7545 return "MME0_CTRL_WR/SBTE3"; 7546 case DCORE0_RTR7: 7547 return "MME0_WAP1/CTRL_RD/SBTE4"; 7548 case DCORE1_RTR0: 7549 return "MME1_WAP1/CTRL_RD/SBTE4"; 7550 case DCORE1_RTR1: 7551 return "MME1_CTRL_WR/SBTE3"; 7552 case DCORE1_RTR2: 7553 return "MME1_WAP0/SBTE2"; 7554 case DCORE1_RTR3: 7555 return "MME1_SBTE0/1"; 7556 case DCORE1_RTR4: 7557 return "TPC10/11"; 7558 case DCORE1_RTR5: 7559 return "TPC8/9"; 7560 case DCORE1_RTR6: 7561 return "TPC6/7"; 7562 case DCORE1_RTR7: 7563 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; 7564 case DCORE2_RTR0: 7565 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; 7566 case DCORE2_RTR1: 7567 return "TPC16/17"; 7568 case DCORE2_RTR2: 7569 return "TPC14/15"; 7570 case DCORE2_RTR3: 7571 return "TPC12/13"; 7572 case DCORE2_RTR4: 7573 return "MME2_SBTE0/1"; 7574 case DCORE2_RTR5: 7575 return "MME2_WAP0/SBTE2"; 7576 case DCORE2_RTR6: 7577 return "MME2_CTRL_WR/SBTE3"; 7578 case DCORE2_RTR7: 7579 return "MME2_WAP1/CTRL_RD/SBTE4"; 7580 case DCORE3_RTR0: 7581 return "MME3_WAP1/CTRL_RD/SBTE4"; 7582 case DCORE3_RTR1: 7583 return "MME3_CTRL_WR/SBTE3"; 7584 case DCORE3_RTR2: 7585 return "MME3_WAP0/SBTE2"; 7586 case DCORE3_RTR3: 7587 return "MME3_SBTE0/1"; 7588 case DCORE3_RTR4: 7589 return "TPC18/19"; 7590 case DCORE3_RTR5: 7591 return "TPC20/21"; 7592 case DCORE3_RTR6: 7593 return "TPC22/23"; 7594 case DCORE3_RTR7: 7595 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; 7596 default: 7597 return "N/A"; 7598 } 7599 } 7600 7601 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) 7602 { 7603 switch (rtr_id) { 7604 case DCORE0_RTR0: 7605 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; 7606 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; 7607 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; 7608 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; 7609 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7610 engines[5] = GAUDI2_ENGINE_ID_PDMA_0; 7611 engines[6] = GAUDI2_ENGINE_ID_PDMA_1; 7612 engines[7] = GAUDI2_ENGINE_ID_PCIE; 7613 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; 7614 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; 7615 engines[10] = GAUDI2_ENGINE_ID_PSOC; 7616 return 11; 7617 7618 case DCORE0_RTR1: 7619 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; 7620 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; 7621 return 2; 7622 7623 case DCORE0_RTR2: 7624 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; 7625 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; 7626 return 2; 7627 7628 case DCORE0_RTR3: 7629 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; 7630 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; 7631 return 2; 7632 7633 case DCORE0_RTR4: 7634 case DCORE0_RTR5: 7635 case DCORE0_RTR6: 7636 case DCORE0_RTR7: 7637 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; 7638 return 1; 7639 7640 case DCORE1_RTR0: 7641 case DCORE1_RTR1: 7642 case DCORE1_RTR2: 7643 case DCORE1_RTR3: 7644 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; 7645 return 1; 7646 7647 case DCORE1_RTR4: 7648 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; 7649 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; 7650 return 2; 7651 7652 case DCORE1_RTR5: 7653 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; 7654 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; 7655 return 2; 7656 7657 case DCORE1_RTR6: 7658 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; 7659 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; 7660 return 2; 7661 7662 case DCORE1_RTR7: 7663 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; 7664 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; 7665 engines[2] = GAUDI2_ENGINE_ID_NIC0_0; 7666 engines[3] = GAUDI2_ENGINE_ID_NIC1_0; 7667 engines[4] = GAUDI2_ENGINE_ID_NIC2_0; 7668 engines[5] = GAUDI2_ENGINE_ID_NIC3_0; 7669 engines[6] = GAUDI2_ENGINE_ID_NIC4_0; 7670 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; 7671 engines[8] = GAUDI2_ENGINE_ID_KDMA; 7672 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; 7673 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; 7674 return 11; 7675 7676 case DCORE2_RTR0: 7677 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; 7678 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; 7679 engines[2] = GAUDI2_ENGINE_ID_NIC5_0; 7680 engines[3] = GAUDI2_ENGINE_ID_NIC6_0; 7681 engines[4] = GAUDI2_ENGINE_ID_NIC7_0; 7682 engines[5] = GAUDI2_ENGINE_ID_NIC8_0; 7683 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; 7684 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; 7685 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7686 return 9; 7687 7688 case DCORE2_RTR1: 7689 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; 7690 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; 7691 return 2; 7692 7693 case DCORE2_RTR2: 7694 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; 7695 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; 7696 return 2; 7697 7698 case DCORE2_RTR3: 7699 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; 7700 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; 7701 return 2; 7702 7703 case DCORE2_RTR4: 7704 case DCORE2_RTR5: 7705 case DCORE2_RTR6: 7706 case DCORE2_RTR7: 7707 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; 7708 return 1; 7709 case DCORE3_RTR0: 7710 case DCORE3_RTR1: 7711 case DCORE3_RTR2: 7712 case DCORE3_RTR3: 7713 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; 7714 return 1; 7715 case DCORE3_RTR4: 7716 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; 7717 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; 7718 return 2; 7719 case DCORE3_RTR5: 7720 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; 7721 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; 7722 return 2; 7723 case DCORE3_RTR6: 7724 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; 7725 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; 7726 return 2; 7727 case DCORE3_RTR7: 7728 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; 7729 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; 7730 engines[2] = GAUDI2_ENGINE_ID_NIC9_0; 7731 engines[3] = GAUDI2_ENGINE_ID_NIC10_0; 7732 engines[4] = GAUDI2_ENGINE_ID_NIC11_0; 7733 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; 7734 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; 7735 engines[7] = GAUDI2_ENGINE_ID_ROT_1; 7736 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7737 return 9; 7738 default: 7739 return 0; 7740 } 7741 } 7742 7743 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7744 u64 rtr_ctrl_base_addr, bool is_write, 7745 u64 *event_mask) 7746 { 7747 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7748 u32 razwi_hi, razwi_lo; 7749 u8 rd_wr_flag; 7750 7751 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7752 7753 if (is_write) { 7754 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); 7755 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); 7756 rd_wr_flag = HL_RAZWI_WRITE; 7757 7758 /* Clear set indication */ 7759 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); 7760 } else { 7761 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); 7762 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); 7763 rd_wr_flag = HL_RAZWI_READ; 7764 7765 /* Clear set indication */ 7766 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); 7767 } 7768 7769 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, 7770 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7771 dev_err_ratelimited(hdev->dev, 7772 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", 7773 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); 7774 7775 dev_err_ratelimited(hdev->dev, 7776 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7777 } 7778 7779 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7780 u64 rtr_ctrl_base_addr, bool is_write, 7781 u64 *event_mask) 7782 { 7783 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7784 u64 razwi_addr = CFG_BASE; 7785 u8 rd_wr_flag; 7786 7787 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7788 7789 if (is_write) { 7790 razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); 7791 rd_wr_flag = HL_RAZWI_WRITE; 7792 7793 /* Clear set indication */ 7794 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); 7795 } else { 7796 razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); 7797 rd_wr_flag = HL_RAZWI_READ; 7798 7799 /* Clear set indication */ 7800 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); 7801 } 7802 7803 hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, 7804 event_mask); 7805 dev_err_ratelimited(hdev->dev, 7806 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address 0x%llX\n", 7807 is_write ? "WR" : "RD", rtr_id, razwi_addr); 7808 7809 dev_err_ratelimited(hdev->dev, 7810 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7811 } 7812 7813 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 7814 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 7815 { 7816 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, 7817 razwi_mask_info, razwi_intr = 0, error_count = 0; 7818 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES; 7819 u64 rtr_ctrl_base_addr; 7820 7821 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 7822 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 7823 if (!razwi_intr) 7824 return 0; 7825 } 7826 7827 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 7828 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); 7829 7830 dev_err_ratelimited(hdev->dev, 7831 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7832 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 7833 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 7834 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 7835 xy, 7836 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 7837 7838 if (xy == 0) { 7839 dev_err_ratelimited(hdev->dev, 7840 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); 7841 goto clear; 7842 } 7843 7844 /* Find router id by router coordinates */ 7845 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++) 7846 if (rtr_coordinates_to_rtr_id[rtr_id] == xy) 7847 break; 7848 7849 if (rtr_id == rtr_map_arr_len) { 7850 dev_err_ratelimited(hdev->dev, 7851 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy); 7852 goto clear; 7853 } 7854 7855 /* Find router mstr_if register base */ 7856 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7857 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7858 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET + 7859 dcore_rtr_id * DCORE_RTR_OFFSET; 7860 7861 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET); 7862 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET); 7863 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET); 7864 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); 7865 7866 if (hbw_aw_set) 7867 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7868 rtr_ctrl_base_addr, true, event_mask); 7869 7870 if (hbw_ar_set) 7871 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7872 rtr_ctrl_base_addr, false, event_mask); 7873 7874 if (lbw_aw_set) 7875 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7876 rtr_ctrl_base_addr, true, event_mask); 7877 7878 if (lbw_ar_set) 7879 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7880 rtr_ctrl_base_addr, false, event_mask); 7881 7882 error_count++; 7883 7884 clear: 7885 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 7886 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 7887 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 7888 7889 return error_count; 7890 } 7891 7892 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 7893 { 7894 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7895 7896 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 7897 7898 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 7899 if (sts_val & BIT(i)) { 7900 gaudi2_print_event(hdev, event_type, true, 7901 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 7902 sts_clr_val |= BIT(i); 7903 error_count++; 7904 } 7905 } 7906 7907 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 7908 7909 return error_count; 7910 } 7911 7912 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 7913 bool extended_err_check, u64 *event_mask) 7914 { 7915 enum razwi_event_sources module; 7916 u32 error_count = 0; 7917 u64 qman_base; 7918 u8 index; 7919 7920 switch (event_type) { 7921 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 7922 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 7923 qman_base = mmDCORE0_TPC0_QM_BASE + 7924 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 7925 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 7926 module = RAZWI_TPC; 7927 break; 7928 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 7929 qman_base = mmDCORE0_TPC6_QM_BASE; 7930 module = RAZWI_TPC; 7931 break; 7932 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 7933 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 7934 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 7935 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 7936 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 7937 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 7938 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 7939 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 7940 module = RAZWI_MME; 7941 break; 7942 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 7943 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 7944 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 7945 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 7946 module = RAZWI_PDMA; 7947 break; 7948 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 7949 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 7950 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 7951 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 7952 module = RAZWI_ROT; 7953 break; 7954 default: 7955 return 0; 7956 } 7957 7958 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7959 7960 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 7961 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 7962 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 7963 error_count += _gaudi2_handle_qm_sei_err(hdev, 7964 qman_base + NIC_QM_OFFSET, event_type); 7965 7966 if (extended_err_check) { 7967 /* check if RAZWI happened */ 7968 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask); 7969 hl_check_for_glbl_errors(hdev); 7970 } 7971 7972 return error_count; 7973 } 7974 7975 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7976 { 7977 u32 qid_base, error_count = 0; 7978 u64 qman_base; 7979 u8 index; 7980 7981 switch (event_type) { 7982 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 7983 index = event_type - GAUDI2_EVENT_TPC0_QM; 7984 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 7985 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7986 break; 7987 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 7988 index = event_type - GAUDI2_EVENT_TPC6_QM; 7989 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 7990 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7991 break; 7992 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 7993 index = event_type - GAUDI2_EVENT_TPC12_QM; 7994 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 7995 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7996 break; 7997 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 7998 index = event_type - GAUDI2_EVENT_TPC18_QM; 7999 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 8000 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8001 break; 8002 case GAUDI2_EVENT_TPC24_QM: 8003 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 8004 qman_base = mmDCORE0_TPC6_QM_BASE; 8005 break; 8006 case GAUDI2_EVENT_MME0_QM: 8007 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 8008 qman_base = mmDCORE0_MME_QM_BASE; 8009 break; 8010 case GAUDI2_EVENT_MME1_QM: 8011 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 8012 qman_base = mmDCORE1_MME_QM_BASE; 8013 break; 8014 case GAUDI2_EVENT_MME2_QM: 8015 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 8016 qman_base = mmDCORE2_MME_QM_BASE; 8017 break; 8018 case GAUDI2_EVENT_MME3_QM: 8019 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 8020 qman_base = mmDCORE3_MME_QM_BASE; 8021 break; 8022 case GAUDI2_EVENT_HDMA0_QM: 8023 index = 0; 8024 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 8025 qman_base = mmDCORE0_EDMA0_QM_BASE; 8026 break; 8027 case GAUDI2_EVENT_HDMA1_QM: 8028 index = 1; 8029 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 8030 qman_base = mmDCORE0_EDMA1_QM_BASE; 8031 break; 8032 case GAUDI2_EVENT_HDMA2_QM: 8033 index = 2; 8034 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 8035 qman_base = mmDCORE1_EDMA0_QM_BASE; 8036 break; 8037 case GAUDI2_EVENT_HDMA3_QM: 8038 index = 3; 8039 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 8040 qman_base = mmDCORE1_EDMA1_QM_BASE; 8041 break; 8042 case GAUDI2_EVENT_HDMA4_QM: 8043 index = 4; 8044 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 8045 qman_base = mmDCORE2_EDMA0_QM_BASE; 8046 break; 8047 case GAUDI2_EVENT_HDMA5_QM: 8048 index = 5; 8049 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 8050 qman_base = mmDCORE2_EDMA1_QM_BASE; 8051 break; 8052 case GAUDI2_EVENT_HDMA6_QM: 8053 index = 6; 8054 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 8055 qman_base = mmDCORE3_EDMA0_QM_BASE; 8056 break; 8057 case GAUDI2_EVENT_HDMA7_QM: 8058 index = 7; 8059 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 8060 qman_base = mmDCORE3_EDMA1_QM_BASE; 8061 break; 8062 case GAUDI2_EVENT_PDMA0_QM: 8063 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 8064 qman_base = mmPDMA0_QM_BASE; 8065 break; 8066 case GAUDI2_EVENT_PDMA1_QM: 8067 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 8068 qman_base = mmPDMA1_QM_BASE; 8069 break; 8070 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 8071 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 8072 qman_base = mmROT0_QM_BASE; 8073 break; 8074 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8075 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 8076 qman_base = mmROT1_QM_BASE; 8077 break; 8078 default: 8079 return 0; 8080 } 8081 8082 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 8083 8084 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 8085 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) { 8086 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 8087 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask); 8088 } 8089 8090 hl_check_for_glbl_errors(hdev); 8091 8092 return error_count; 8093 } 8094 8095 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 8096 { 8097 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8098 8099 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); 8100 8101 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 8102 if (sts_val & BIT(i)) { 8103 gaudi2_print_event(hdev, event_type, true, 8104 "err cause: %s", gaudi2_arc_sei_error_cause[i]); 8105 sts_clr_val |= BIT(i); 8106 error_count++; 8107 } 8108 } 8109 8110 hl_check_for_glbl_errors(hdev); 8111 8112 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); 8113 8114 return error_count; 8115 } 8116 8117 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 8118 { 8119 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8120 8121 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 8122 8123 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 8124 if (sts_val & BIT(i)) { 8125 gaudi2_print_event(hdev, event_type, true, 8126 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 8127 sts_clr_val |= BIT(i); 8128 error_count++; 8129 } 8130 } 8131 8132 hl_check_for_glbl_errors(hdev); 8133 8134 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 8135 8136 return error_count; 8137 } 8138 8139 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 8140 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8141 u64 *event_mask) 8142 { 8143 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8144 u32 error_count = 0; 8145 int i; 8146 8147 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 8148 if (intr_cause_data & BIT(i)) { 8149 gaudi2_print_event(hdev, event_type, true, 8150 "err cause: %s", guadi2_rot_error_cause[i]); 8151 error_count++; 8152 } 8153 8154 /* check if RAZWI happened */ 8155 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask); 8156 hl_check_for_glbl_errors(hdev); 8157 8158 return error_count; 8159 } 8160 8161 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 8162 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8163 u64 *event_mask) 8164 { 8165 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8166 u32 error_count = 0; 8167 int i; 8168 8169 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 8170 if (intr_cause_data & BIT(i)) { 8171 gaudi2_print_event(hdev, event_type, true, 8172 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 8173 error_count++; 8174 } 8175 8176 /* check if RAZWI happened */ 8177 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask); 8178 hl_check_for_glbl_errors(hdev); 8179 8180 return error_count; 8181 } 8182 8183 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8184 u64 *event_mask) 8185 { 8186 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8187 int i; 8188 8189 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8190 /* DCORE DEC */ 8191 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8192 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8193 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8194 else 8195 /* PCIE DEC */ 8196 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8197 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8198 8199 sts_val = RREG32(sts_addr); 8200 8201 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8202 if (sts_val & BIT(i)) { 8203 gaudi2_print_event(hdev, event_type, true, 8204 "err cause: %s", gaudi2_dec_error_cause[i]); 8205 sts_clr_val |= BIT(i); 8206 error_count++; 8207 } 8208 } 8209 8210 /* check if RAZWI happened */ 8211 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask); 8212 hl_check_for_glbl_errors(hdev); 8213 8214 /* Write 1 clear errors */ 8215 WREG32(sts_addr, sts_clr_val); 8216 8217 return error_count; 8218 } 8219 8220 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8221 u64 *event_mask) 8222 { 8223 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8224 int i; 8225 8226 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8227 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8228 8229 sts_val = RREG32(sts_addr); 8230 8231 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8232 if (sts_val & BIT(i)) { 8233 gaudi2_print_event(hdev, event_type, true, 8234 "err cause: %s", guadi2_mme_error_cause[i]); 8235 sts_clr_val |= BIT(i); 8236 error_count++; 8237 } 8238 } 8239 8240 /* check if RAZWI happened */ 8241 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8242 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask); 8243 8244 hl_check_for_glbl_errors(hdev); 8245 8246 WREG32(sts_clr_addr, sts_clr_val); 8247 8248 return error_count; 8249 } 8250 8251 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8252 u64 intr_cause_data) 8253 { 8254 int i, error_count = 0; 8255 8256 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8257 if (intr_cause_data & BIT(i)) { 8258 gaudi2_print_event(hdev, event_type, true, 8259 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8260 error_count++; 8261 } 8262 8263 hl_check_for_glbl_errors(hdev); 8264 8265 return error_count; 8266 } 8267 8268 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8269 u64 *event_mask) 8270 { 8271 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8272 int i; 8273 8274 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8275 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8276 8277 sts_val = RREG32(sts_addr); 8278 8279 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8280 if (sts_val & BIT(i)) { 8281 gaudi2_print_event(hdev, event_type, true, 8282 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8283 sts_clr_val |= BIT(i); 8284 error_count++; 8285 } 8286 } 8287 8288 /* check if RAZWI happened on WAP0/1 */ 8289 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask); 8290 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask); 8291 hl_check_for_glbl_errors(hdev); 8292 8293 WREG32(sts_clr_addr, sts_clr_val); 8294 8295 return error_count; 8296 } 8297 8298 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8299 u64 intr_cause_data) 8300 { 8301 u32 error_count = 0; 8302 int i; 8303 8304 /* If an AXI read or write error is received, an error is reported and 8305 * interrupt message is sent. Due to an HW errata, when reading the cause 8306 * register of the KDMA engine, the reported error is always HBW even if 8307 * the actual error caused by a LBW KDMA transaction. 8308 */ 8309 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8310 if (intr_cause_data & BIT(i)) { 8311 gaudi2_print_event(hdev, event_type, true, 8312 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8313 error_count++; 8314 } 8315 8316 hl_check_for_glbl_errors(hdev); 8317 8318 return error_count; 8319 } 8320 8321 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, 8322 u64 intr_cause_data) 8323 { 8324 u32 error_count = 0; 8325 int i; 8326 8327 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8328 if (intr_cause_data & BIT(i)) { 8329 gaudi2_print_event(hdev, event_type, true, 8330 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8331 error_count++; 8332 } 8333 8334 hl_check_for_glbl_errors(hdev); 8335 8336 return error_count; 8337 } 8338 8339 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8340 { 8341 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8342 8343 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8344 if (RREG32(razwi_happened_addr)) { 8345 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8346 GAUDI2_ENGINE_ID_PCIE, event_mask); 8347 WREG32(razwi_happened_addr, 0x1); 8348 } 8349 8350 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8351 if (RREG32(razwi_happened_addr)) { 8352 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8353 GAUDI2_ENGINE_ID_PCIE, event_mask); 8354 WREG32(razwi_happened_addr, 0x1); 8355 } 8356 8357 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8358 if (RREG32(razwi_happened_addr)) { 8359 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8360 GAUDI2_ENGINE_ID_PCIE, event_mask); 8361 WREG32(razwi_happened_addr, 0x1); 8362 } 8363 8364 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8365 if (RREG32(razwi_happened_addr)) { 8366 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8367 GAUDI2_ENGINE_ID_PCIE, event_mask); 8368 WREG32(razwi_happened_addr, 0x1); 8369 } 8370 } 8371 8372 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8373 u64 intr_cause_data, u64 *event_mask) 8374 { 8375 u32 error_count = 0; 8376 int i; 8377 8378 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8379 if (!(intr_cause_data & BIT_ULL(i))) 8380 continue; 8381 8382 gaudi2_print_event(hdev, event_type, true, 8383 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8384 error_count++; 8385 8386 switch (intr_cause_data & BIT_ULL(i)) { 8387 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 8388 hl_check_for_glbl_errors(hdev); 8389 break; 8390 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 8391 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8392 break; 8393 } 8394 } 8395 8396 return error_count; 8397 } 8398 8399 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8400 u64 intr_cause_data) 8401 8402 { 8403 u32 error_count = 0; 8404 int i; 8405 8406 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8407 if (intr_cause_data & BIT_ULL(i)) { 8408 gaudi2_print_event(hdev, event_type, true, 8409 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8410 error_count++; 8411 } 8412 } 8413 8414 return error_count; 8415 } 8416 8417 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8418 { 8419 u32 error_count = 0; 8420 int i; 8421 8422 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8423 if (intr_cause_data & BIT_ULL(i)) { 8424 gaudi2_print_event(hdev, event_type, true, 8425 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8426 error_count++; 8427 } 8428 } 8429 8430 return error_count; 8431 } 8432 8433 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8434 u64 *event_mask) 8435 { 8436 u32 valid, val, axid_l, axid_h; 8437 u64 addr; 8438 8439 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8440 8441 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8442 return; 8443 8444 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8445 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8446 addr <<= 32; 8447 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8448 8449 axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB)); 8450 axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB)); 8451 8452 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n", 8453 is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l); 8454 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8455 8456 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); 8457 } 8458 8459 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8460 { 8461 u32 valid, val; 8462 u64 addr; 8463 8464 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8465 8466 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8467 return; 8468 8469 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8470 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8471 addr <<= 32; 8472 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8473 8474 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8475 is_pmmu ? "PMMU" : "HMMU", addr); 8476 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); 8477 } 8478 8479 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8480 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8481 { 8482 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8483 int i; 8484 8485 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8486 8487 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8488 if (spi_sei_cause & BIT(i)) { 8489 gaudi2_print_event(hdev, event_type, true, 8490 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8491 8492 if (i == 0) 8493 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8494 else if (i == 1) 8495 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8496 8497 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8498 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8499 8500 error_count++; 8501 } 8502 } 8503 8504 /* Clear cause */ 8505 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8506 8507 /* Clear interrupt */ 8508 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8509 8510 return error_count; 8511 } 8512 8513 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 8514 { 8515 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 8516 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 8517 int i; 8518 8519 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 8520 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 8521 8522 sei_cause_val = RREG32(sei_cause_addr); 8523 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 8524 cq_intr_val = RREG32(cq_intr_addr); 8525 8526 /* SEI interrupt */ 8527 if (sei_cause_cause) { 8528 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 8529 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 8530 sei_cause_val); 8531 8532 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 8533 if (!(sei_cause_cause & BIT(i))) 8534 continue; 8535 8536 gaudi2_print_event(hdev, event_type, true, 8537 "err cause: %s. %s: 0x%X\n", 8538 gaudi2_sm_sei_cause[i].cause_name, 8539 gaudi2_sm_sei_cause[i].log_name, 8540 sei_cause_log); 8541 error_count++; 8542 break; 8543 } 8544 8545 /* Clear SM_SEI_CAUSE */ 8546 WREG32(sei_cause_addr, 0); 8547 } 8548 8549 /* CQ interrupt */ 8550 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 8551 cq_intr_queue_index = 8552 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 8553 cq_intr_val); 8554 8555 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 8556 sm_index, cq_intr_queue_index); 8557 error_count++; 8558 8559 /* Clear CQ_INTR */ 8560 WREG32(cq_intr_addr, 0); 8561 } 8562 8563 hl_check_for_glbl_errors(hdev); 8564 8565 return error_count; 8566 } 8567 8568 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8569 { 8570 bool is_pmmu = false; 8571 u32 error_count = 0; 8572 u64 mmu_base; 8573 u8 index; 8574 8575 switch (event_type) { 8576 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 8577 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; 8578 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8579 break; 8580 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 8581 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); 8582 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8583 break; 8584 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 8585 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; 8586 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8587 break; 8588 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 8589 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); 8590 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8591 break; 8592 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 8593 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; 8594 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8595 break; 8596 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 8597 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); 8598 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8599 break; 8600 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 8601 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; 8602 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8603 break; 8604 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 8605 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); 8606 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8607 break; 8608 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 8609 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8610 is_pmmu = true; 8611 mmu_base = mmPMMU_HBW_MMU_BASE; 8612 break; 8613 default: 8614 return 0; 8615 } 8616 8617 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 8618 is_pmmu, event_mask); 8619 hl_check_for_glbl_errors(hdev); 8620 8621 return error_count; 8622 } 8623 8624 8625 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 8626 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 8627 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 8628 { 8629 u32 addr, beat, beat_shift; 8630 bool rc = false; 8631 8632 dev_err_ratelimited(hdev->dev, 8633 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 8634 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 8635 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 8636 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 8637 8638 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 8639 dev_err_ratelimited(hdev->dev, 8640 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 8641 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 8642 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 8643 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 8644 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 8645 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 8646 8647 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 8648 for (beat = 0 ; beat < 4 ; beat++) { 8649 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8650 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 8651 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 8652 beat, 8653 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8654 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8655 8656 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8657 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 8658 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 8659 beat, 8660 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8661 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8662 rc |= true; 8663 } 8664 8665 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 8666 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8667 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 8668 dev_err_ratelimited(hdev->dev, 8669 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 8670 beat, 8671 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8672 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8673 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 8674 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 8675 rc |= true; 8676 } 8677 8678 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 8679 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8680 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 8681 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8682 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 8683 } 8684 8685 return rc; 8686 } 8687 8688 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 8689 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 8690 { 8691 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 8692 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 8693 8694 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 8695 8696 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 8697 derr & 0x3, derr & 0xc); 8698 8699 /* JIRA H6-3286 - the following prints may not be valid */ 8700 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 8701 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 8702 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 8703 dev_err_ratelimited(hdev->dev, 8704 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 8705 i, 8706 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 8707 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 8708 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 8709 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 8710 } 8711 } 8712 8713 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 8714 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 8715 { 8716 __le32 *col_cmd = ca_par_err_data->dbg_col; 8717 __le16 *row_cmd = ca_par_err_data->dbg_row; 8718 u32 i; 8719 8720 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 8721 8722 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 8723 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 8724 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 8725 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 8726 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 8727 } 8728 8729 /* Returns true if hard reset is needed or false otherwise */ 8730 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 8731 struct hl_eq_hbm_sei_data *sei_data) 8732 { 8733 bool require_hard_reset = false; 8734 u32 hbm_id, mc_id, cause_idx; 8735 8736 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 8737 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 8738 8739 cause_idx = sei_data->hdr.sei_cause; 8740 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 8741 gaudi2_print_event(hdev, event_type, true, 8742 "err cause: %s", 8743 "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx); 8744 return true; 8745 } 8746 8747 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 8748 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8749 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 8750 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8751 hbm_mc_sei_cause[cause_idx]); 8752 8753 /* Print error-specific info */ 8754 switch (cause_idx) { 8755 case HBM_SEI_CATTRIP: 8756 require_hard_reset = true; 8757 break; 8758 8759 case HBM_SEI_CMD_PARITY_EVEN: 8760 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 8761 le32_to_cpu(sei_data->hdr.cnt)); 8762 require_hard_reset = true; 8763 break; 8764 8765 case HBM_SEI_CMD_PARITY_ODD: 8766 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 8767 le32_to_cpu(sei_data->hdr.cnt)); 8768 require_hard_reset = true; 8769 break; 8770 8771 case HBM_SEI_WRITE_DATA_PARITY_ERR: 8772 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 8773 le32_to_cpu(sei_data->hdr.cnt)); 8774 require_hard_reset = true; 8775 break; 8776 8777 case HBM_SEI_READ_ERR: 8778 /* Unlike other SEI events, read error requires further processing of the 8779 * raw data in order to determine the root cause. 8780 */ 8781 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 8782 &sei_data->read_err_info, 8783 le32_to_cpu(sei_data->hdr.cnt)); 8784 break; 8785 8786 default: 8787 break; 8788 } 8789 8790 require_hard_reset |= !!sei_data->hdr.is_critical; 8791 8792 return require_hard_reset; 8793 } 8794 8795 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 8796 u64 intr_cause_data) 8797 { 8798 if (intr_cause_data) { 8799 gaudi2_print_event(hdev, event_type, true, 8800 "temperature error cause: %#llx", intr_cause_data); 8801 return 1; 8802 } 8803 8804 return 0; 8805 } 8806 8807 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 8808 { 8809 u32 i, error_count = 0; 8810 8811 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 8812 if (intr_cause_data & hbm_mc_spi[i].mask) { 8813 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 8814 hbm_mc_spi[i].cause); 8815 error_count++; 8816 } 8817 8818 return error_count; 8819 } 8820 8821 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8822 { 8823 ktime_t zero_time = ktime_set(0, 0); 8824 8825 mutex_lock(&hdev->clk_throttling.lock); 8826 8827 switch (event_type) { 8828 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 8829 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 8830 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 8831 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 8832 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 8833 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 8834 break; 8835 8836 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 8837 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 8838 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 8839 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 8840 break; 8841 8842 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 8843 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 8844 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 8845 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 8846 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 8847 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8848 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 8849 break; 8850 8851 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 8852 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 8853 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 8854 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8855 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 8856 break; 8857 8858 default: 8859 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 8860 break; 8861 } 8862 8863 mutex_unlock(&hdev->clk_throttling.lock); 8864 } 8865 8866 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 8867 struct cpucp_pkt_sync_err *sync_err) 8868 { 8869 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8870 8871 gaudi2_print_event(hdev, event_type, false, 8872 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8873 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 8874 q->pi, atomic_read(&q->ci)); 8875 } 8876 8877 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 8878 { 8879 u32 p2p_intr, msix_gw_intr, error_count = 0; 8880 8881 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 8882 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 8883 8884 if (p2p_intr) { 8885 gaudi2_print_event(hdev, event_type, true, 8886 "pcie p2p transaction terminated due to security, req_id(0x%x)\n", 8887 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 8888 8889 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 8890 error_count++; 8891 } 8892 8893 if (msix_gw_intr) { 8894 gaudi2_print_event(hdev, event_type, true, 8895 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n", 8896 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 8897 8898 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 8899 error_count++; 8900 } 8901 8902 return error_count; 8903 } 8904 8905 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 8906 struct hl_eq_pcie_drain_ind_data *drain_data) 8907 { 8908 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 8909 8910 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 8911 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 8912 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 8913 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 8914 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 8915 8916 if (cause & BIT_ULL(0)) { 8917 dev_err_ratelimited(hdev->dev, 8918 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 8919 !!lbw_rd, !!lbw_wr); 8920 error_count++; 8921 } 8922 8923 if (cause & BIT_ULL(1)) { 8924 dev_err_ratelimited(hdev->dev, 8925 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 8926 hbw_rd, hbw_wr); 8927 error_count++; 8928 } 8929 8930 return error_count; 8931 } 8932 8933 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 8934 { 8935 u32 error_count = 0; 8936 int i; 8937 8938 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 8939 if (intr_cause_data & BIT_ULL(i)) { 8940 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 8941 gaudi2_psoc_axi_drain_interrupts_cause[i]); 8942 error_count++; 8943 } 8944 } 8945 8946 hl_check_for_glbl_errors(hdev); 8947 8948 return error_count; 8949 } 8950 8951 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 8952 struct cpucp_pkt_sync_err *sync_err) 8953 { 8954 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8955 8956 gaudi2_print_event(hdev, event_type, false, 8957 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8958 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 8959 } 8960 8961 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 8962 struct hl_eq_engine_arc_intr_data *data) 8963 { 8964 struct hl_engine_arc_dccm_queue_full_irq *q; 8965 u32 intr_type, engine_id; 8966 u64 payload; 8967 8968 intr_type = le32_to_cpu(data->intr_type); 8969 engine_id = le32_to_cpu(data->engine_id); 8970 payload = le64_to_cpu(data->payload); 8971 8972 switch (intr_type) { 8973 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 8974 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 8975 8976 gaudi2_print_event(hdev, event_type, true, 8977 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n", 8978 engine_id, intr_type, q->queue_index); 8979 return 1; 8980 default: 8981 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n"); 8982 return 0; 8983 } 8984 } 8985 8986 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 8987 { 8988 struct gaudi2_device *gaudi2 = hdev->asic_specific; 8989 bool reset_required = false, is_critical = false; 8990 u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0; 8991 u64 event_mask = 0; 8992 u16 event_type; 8993 8994 ctl = le32_to_cpu(eq_entry->hdr.ctl); 8995 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 8996 8997 if (event_type >= GAUDI2_EVENT_SIZE) { 8998 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 8999 event_type, GAUDI2_EVENT_SIZE - 1); 9000 return; 9001 } 9002 9003 gaudi2->events_stat[event_type]++; 9004 gaudi2->events_stat_aggregate[event_type]++; 9005 9006 switch (event_type) { 9007 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 9008 fallthrough; 9009 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 9010 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9011 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9012 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 9013 is_critical = eq_entry->ecc_data.is_critical; 9014 error_count++; 9015 break; 9016 9017 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 9018 fallthrough; 9019 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 9020 fallthrough; 9021 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 9022 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask); 9023 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9024 break; 9025 9026 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 9027 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9028 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 9029 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9030 break; 9031 9032 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 9033 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 9034 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9035 break; 9036 9037 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 9038 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 9039 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9040 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask); 9041 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9042 break; 9043 9044 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 9045 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 9046 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 9047 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 9048 &eq_entry->razwi_with_intr_cause, &event_mask); 9049 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9050 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9051 break; 9052 9053 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 9054 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 9055 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9056 &eq_entry->razwi_with_intr_cause, &event_mask); 9057 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9058 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9059 break; 9060 9061 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 9062 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 9063 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9064 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9065 break; 9066 9067 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 9068 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 9069 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 9070 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 9071 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 9072 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 9073 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 9074 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 9075 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 9076 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 9077 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 9078 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 9079 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 9080 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 9081 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 9082 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 9083 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 9084 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 9085 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 9086 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 9087 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 9088 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 9089 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 9090 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 9091 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 9092 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 9093 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 9094 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9095 &eq_entry->razwi_with_intr_cause, &event_mask); 9096 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9097 break; 9098 9099 case GAUDI2_EVENT_DEC0_SPI: 9100 case GAUDI2_EVENT_DEC1_SPI: 9101 case GAUDI2_EVENT_DEC2_SPI: 9102 case GAUDI2_EVENT_DEC3_SPI: 9103 case GAUDI2_EVENT_DEC4_SPI: 9104 case GAUDI2_EVENT_DEC5_SPI: 9105 case GAUDI2_EVENT_DEC6_SPI: 9106 case GAUDI2_EVENT_DEC7_SPI: 9107 case GAUDI2_EVENT_DEC8_SPI: 9108 case GAUDI2_EVENT_DEC9_SPI: 9109 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 9110 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 9111 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9112 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9113 break; 9114 9115 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 9116 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 9117 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 9118 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 9119 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 9120 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 9121 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 9122 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9123 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9124 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9125 break; 9126 9127 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 9128 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 9129 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 9130 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 9131 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 9132 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 9133 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 9134 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9135 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9136 break; 9137 9138 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 9139 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 9140 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 9141 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 9142 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 9143 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 9144 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 9145 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask); 9146 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9147 break; 9148 9149 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 9150 case GAUDI2_EVENT_KDMA0_CORE: 9151 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 9152 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9153 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9154 break; 9155 9156 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: 9157 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 9158 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9159 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9160 break; 9161 9162 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 9163 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 9164 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 9165 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9166 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9167 break; 9168 9169 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9170 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9171 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9172 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9173 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 9174 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9175 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9176 break; 9177 9178 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 9179 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 9180 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9181 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9182 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9183 break; 9184 9185 case GAUDI2_EVENT_PMMU_FATAL_0: 9186 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 9187 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9188 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9189 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9190 break; 9191 9192 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 9193 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 9194 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9195 break; 9196 9197 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9198 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9199 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9200 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9201 reset_required = true; 9202 } 9203 error_count++; 9204 break; 9205 9206 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9207 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9208 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9209 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9210 break; 9211 9212 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9213 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9214 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9215 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9216 break; 9217 9218 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9219 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9220 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9221 break; 9222 9223 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9224 error_count = gaudi2_handle_psoc_drain(hdev, 9225 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9226 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9227 break; 9228 9229 case GAUDI2_EVENT_CPU_AXI_ECC: 9230 error_count = GAUDI2_NA_EVENT_CAUSE; 9231 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9232 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9233 break; 9234 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9235 error_count = GAUDI2_NA_EVENT_CAUSE; 9236 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9237 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9238 break; 9239 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9240 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9241 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9242 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9243 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9244 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9245 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9246 break; 9247 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9248 error_count = GAUDI2_NA_EVENT_CAUSE; 9249 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9250 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9251 break; 9252 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9253 error_count = GAUDI2_NA_EVENT_CAUSE; 9254 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9255 break; 9256 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9257 error_count = GAUDI2_NA_EVENT_CAUSE; 9258 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9259 break; 9260 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9261 error_count = GAUDI2_NA_EVENT_CAUSE; 9262 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9263 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9264 break; 9265 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9266 error_count = GAUDI2_NA_EVENT_CAUSE; 9267 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9268 break; 9269 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9270 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9271 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9272 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9273 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9274 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9275 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9276 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9277 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9278 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9279 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9280 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9281 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9282 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9283 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9284 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9285 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9286 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9287 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9288 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9289 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9290 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9291 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9292 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9293 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9294 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9295 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9296 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9297 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9298 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9299 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9300 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9301 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9302 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9303 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9304 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9305 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9306 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9307 fallthrough; 9308 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9309 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9310 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9311 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9312 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9313 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9314 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9315 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9316 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9317 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9318 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9319 error_count = GAUDI2_NA_EVENT_CAUSE; 9320 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9321 break; 9322 9323 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9324 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9325 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9326 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9327 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9328 error_count = GAUDI2_NA_EVENT_CAUSE; 9329 break; 9330 9331 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9332 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9333 error_count = GAUDI2_NA_EVENT_CAUSE; 9334 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9335 break; 9336 9337 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9338 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9339 error_count = GAUDI2_NA_EVENT_CAUSE; 9340 /* Do nothing- FW will handle it */ 9341 break; 9342 9343 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9344 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9345 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9346 break; 9347 9348 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9349 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9350 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9351 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9352 break; 9353 9354 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9355 error_count = GAUDI2_NA_EVENT_CAUSE; 9356 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9357 break; 9358 9359 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9360 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9361 le64_to_cpu(eq_entry->data[0])); 9362 error_count = GAUDI2_NA_EVENT_CAUSE; 9363 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9364 break; 9365 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9366 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9367 le64_to_cpu(eq_entry->data[0])); 9368 error_count = GAUDI2_NA_EVENT_CAUSE; 9369 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9370 break; 9371 9372 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9373 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9374 error_count = GAUDI2_NA_EVENT_CAUSE; 9375 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9376 break; 9377 9378 case GAUDI2_EVENT_ARC_DCCM_FULL: 9379 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9380 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9381 break; 9382 9383 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9384 case GAUDI2_EVENT_DEV_RESET_REQ: 9385 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9386 error_count = GAUDI2_NA_EVENT_CAUSE; 9387 is_critical = true; 9388 break; 9389 9390 default: 9391 if (gaudi2_irq_map_table[event_type].valid) { 9392 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9393 event_type); 9394 error_count = GAUDI2_NA_EVENT_CAUSE; 9395 } 9396 } 9397 9398 /* Make sure to dump an error in case no error cause was printed so far. 9399 * Note that although we have counted the errors, we use this number as 9400 * a boolean. 9401 */ 9402 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9403 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9404 else if (error_count == 0) 9405 gaudi2_print_event(hdev, event_type, true, 9406 "No error cause for H/W event %u\n", event_type); 9407 9408 if ((gaudi2_irq_map_table[event_type].reset || reset_required) && 9409 (hdev->hard_reset_on_fw_events || 9410 (hdev->asic_prop.fw_security_enabled && is_critical))) 9411 goto reset_device; 9412 9413 /* Send unmask irq only for interrupts not classified as MSG */ 9414 if (!gaudi2_irq_map_table[event_type].msg) 9415 hl_fw_unmask_irq(hdev, event_type); 9416 9417 if (event_mask) 9418 hl_notifier_event_send_all(hdev, event_mask); 9419 9420 return; 9421 9422 reset_device: 9423 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9424 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9425 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9426 } else { 9427 reset_flags |= HL_DRV_RESET_DELAY; 9428 } 9429 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 9430 hl_device_cond_reset(hdev, reset_flags, event_mask); 9431 } 9432 9433 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 9434 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 9435 u32 hw_queue_id, u32 size, u64 addr, u32 val) 9436 { 9437 u32 ctl, pkt_size; 9438 int rc = 0; 9439 9440 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 9441 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 9442 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 9443 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 9444 9445 lin_dma_pkt->ctl = cpu_to_le32(ctl); 9446 lin_dma_pkt->src_addr = cpu_to_le64(val); 9447 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 9448 lin_dma_pkt->tsize = cpu_to_le32(size); 9449 9450 pkt_size = sizeof(struct packet_lin_dma); 9451 9452 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 9453 if (rc) 9454 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 9455 hw_queue_id); 9456 9457 return rc; 9458 } 9459 9460 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 9461 { 9462 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 9463 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 9464 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 9465 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 9466 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 9467 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 9468 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 9469 struct asic_fixed_properties *prop = &hdev->asic_prop; 9470 void *lin_dma_pkts_arr; 9471 dma_addr_t pkt_dma_addr; 9472 int rc = 0, dma_num = 0; 9473 9474 if (prop->edma_enabled_mask == 0) { 9475 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 9476 return -EIO; 9477 } 9478 9479 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9480 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 9481 comp_addr = CFG_BASE + sob_addr; 9482 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 9483 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 9484 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 9485 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 9486 9487 /* Calculate how many lin dma pkts we'll need */ 9488 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 9489 pkt_size = sizeof(struct packet_lin_dma); 9490 9491 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 9492 &pkt_dma_addr, GFP_KERNEL); 9493 if (!lin_dma_pkts_arr) 9494 return -ENOMEM; 9495 9496 /* 9497 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 9498 * only the first one to restore later 9499 * also set the sob addr for all edma cores for completion. 9500 * set QM as trusted to allow it to access physical address with MMU bp. 9501 */ 9502 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 9503 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9504 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9505 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9506 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9507 9508 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9509 continue; 9510 9511 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 9512 edma_offset, mmubp); 9513 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 9514 lower_32_bits(comp_addr)); 9515 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 9516 upper_32_bits(comp_addr)); 9517 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 9518 comp_val); 9519 gaudi2_qman_set_test_mode(hdev, 9520 edma_queues_id[dcore] + 4 * edma_idx, true); 9521 } 9522 } 9523 9524 WREG32(sob_addr, 0); 9525 9526 while (cur_addr < end_addr) { 9527 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9528 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9529 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9530 9531 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9532 continue; 9533 9534 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 9535 9536 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 9537 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 9538 pkt_dma_addr + dma_num * pkt_size, 9539 edma_queues_id[dcore] + edma_idx * 4, 9540 chunk_size, cur_addr, val); 9541 if (rc) 9542 goto end; 9543 9544 dma_num++; 9545 cur_addr += chunk_size; 9546 if (cur_addr == end_addr) 9547 break; 9548 } 9549 } 9550 } 9551 9552 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 9553 if (rc) { 9554 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 9555 goto end; 9556 } 9557 end: 9558 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9559 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9560 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9561 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9562 9563 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9564 continue; 9565 9566 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 9567 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 9568 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 9569 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 9570 gaudi2_qman_set_test_mode(hdev, 9571 edma_queues_id[dcore] + 4 * edma_idx, false); 9572 } 9573 } 9574 9575 WREG32(sob_addr, 0); 9576 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 9577 9578 return rc; 9579 } 9580 9581 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 9582 { 9583 int rc; 9584 struct asic_fixed_properties *prop = &hdev->asic_prop; 9585 u64 size = prop->dram_end_address - prop->dram_user_base_address; 9586 9587 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 9588 9589 if (rc) 9590 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 9591 prop->dram_user_base_address, size); 9592 return rc; 9593 } 9594 9595 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 9596 { 9597 int rc; 9598 struct asic_fixed_properties *prop = &hdev->asic_prop; 9599 u64 val = hdev->memory_scrub_val; 9600 u64 addr, size; 9601 9602 if (!hdev->memory_scrub) 9603 return 0; 9604 9605 /* scrub SRAM */ 9606 addr = prop->sram_user_base_address; 9607 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 9608 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 9609 addr, addr + size, val); 9610 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 9611 if (rc) { 9612 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 9613 return rc; 9614 } 9615 9616 /* scrub DRAM */ 9617 rc = gaudi2_scrub_device_dram(hdev, val); 9618 if (rc) { 9619 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 9620 return rc; 9621 } 9622 return 0; 9623 } 9624 9625 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 9626 { 9627 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 9628 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 9629 u32 val, size, offset; 9630 int dcore_id; 9631 9632 offset = hdev->asic_prop.first_available_cq[0] * 4; 9633 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 9634 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 9635 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 9636 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 9637 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 9638 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 9639 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 9640 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 9641 9642 /* memset dcore0 CQ registers */ 9643 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9644 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9645 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9646 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9647 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9648 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9649 9650 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 9651 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 9652 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 9653 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 9654 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 9655 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 9656 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 9657 9658 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9659 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9660 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9661 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9662 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9663 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9664 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9665 9666 cq_lbw_l_addr += DCORE_OFFSET; 9667 cq_lbw_h_addr += DCORE_OFFSET; 9668 cq_lbw_data_addr += DCORE_OFFSET; 9669 cq_base_l_addr += DCORE_OFFSET; 9670 cq_base_h_addr += DCORE_OFFSET; 9671 cq_size_addr += DCORE_OFFSET; 9672 } 9673 9674 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 9675 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 9676 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 9677 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 9678 9679 /* memset dcore0 monitors */ 9680 gaudi2_memset_device_lbw(hdev, addr, size, val); 9681 9682 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 9683 gaudi2_memset_device_lbw(hdev, addr, size, 0); 9684 9685 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 9686 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 9687 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 9688 9689 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9690 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 9691 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 9692 mon_sts_addr += DCORE_OFFSET; 9693 mon_cfg_addr += DCORE_OFFSET; 9694 } 9695 9696 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9697 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 9698 val = 0; 9699 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 9700 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9701 9702 /* memset dcore0 sobs */ 9703 gaudi2_memset_device_lbw(hdev, addr, size, val); 9704 9705 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 9706 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 9707 9708 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9709 gaudi2_memset_device_lbw(hdev, addr, size, val); 9710 addr += DCORE_OFFSET; 9711 } 9712 9713 /* Flush all WREG to prevent race */ 9714 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9715 } 9716 9717 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 9718 { 9719 u32 reg_base, hw_queue_id; 9720 9721 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 9722 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9723 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9724 continue; 9725 9726 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9727 9728 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9729 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9730 } 9731 9732 /* Flush all WREG to prevent race */ 9733 RREG32(mmPDMA0_QM_ARB_CFG_0); 9734 } 9735 9736 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 9737 { 9738 u32 reg_base, hw_queue_id; 9739 9740 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 9741 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9742 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9743 continue; 9744 9745 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9746 9747 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9748 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9749 } 9750 9751 /* Flush all WREG to prevent race */ 9752 RREG32(mmPDMA0_QM_ARB_CFG_0); 9753 } 9754 9755 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 9756 { 9757 return 0; 9758 } 9759 9760 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 9761 { 9762 } 9763 9764 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 9765 struct dup_block_ctx *cfg_ctx) 9766 { 9767 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 9768 u8 seq; 9769 int i; 9770 9771 for (i = 0 ; i < cfg_ctx->instances ; i++) { 9772 seq = block_idx * cfg_ctx->instances + i; 9773 9774 /* skip disabled instance */ 9775 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 9776 continue; 9777 9778 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 9779 cfg_ctx->data); 9780 } 9781 } 9782 9783 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 9784 u64 mask) 9785 { 9786 int i; 9787 9788 cfg_ctx->enabled_mask = mask; 9789 9790 for (i = 0 ; i < cfg_ctx->blocks ; i++) 9791 gaudi2_init_block_instances(hdev, i, cfg_ctx); 9792 } 9793 9794 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 9795 { 9796 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 9797 } 9798 9799 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 9800 { 9801 void *host_mem_virtual_addr; 9802 dma_addr_t host_mem_dma_addr; 9803 u64 reserved_va_base; 9804 u32 pos, size_left, size_to_dma; 9805 struct hl_ctx *ctx; 9806 int rc = 0; 9807 9808 /* Fetch the ctx */ 9809 ctx = hl_get_compute_ctx(hdev); 9810 if (!ctx) { 9811 dev_err(hdev->dev, "No ctx available\n"); 9812 return -EINVAL; 9813 } 9814 9815 /* Allocate buffers for read and for poll */ 9816 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 9817 GFP_KERNEL | __GFP_ZERO); 9818 if (host_mem_virtual_addr == NULL) { 9819 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 9820 rc = -ENOMEM; 9821 goto put_ctx; 9822 } 9823 9824 /* Reserve VM region on asic side */ 9825 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 9826 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9827 if (!reserved_va_base) { 9828 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 9829 rc = -ENOMEM; 9830 goto free_data_buffer; 9831 } 9832 9833 /* Create mapping on asic side */ 9834 mutex_lock(&hdev->mmu_lock); 9835 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 9836 hl_mmu_invalidate_cache_range(hdev, false, 9837 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 9838 ctx->asid, reserved_va_base, SZ_2M); 9839 mutex_unlock(&hdev->mmu_lock); 9840 if (rc) { 9841 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 9842 goto unreserve_va; 9843 } 9844 9845 /* Enable MMU on KDMA */ 9846 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 9847 9848 pos = 0; 9849 size_left = size; 9850 size_to_dma = SZ_2M; 9851 9852 while (size_left > 0) { 9853 if (size_left < SZ_2M) 9854 size_to_dma = size_left; 9855 9856 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 9857 if (rc) 9858 break; 9859 9860 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 9861 9862 if (size_left <= SZ_2M) 9863 break; 9864 9865 pos += SZ_2M; 9866 addr += SZ_2M; 9867 size_left -= SZ_2M; 9868 } 9869 9870 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 9871 9872 mutex_lock(&hdev->mmu_lock); 9873 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 9874 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 9875 ctx->asid, reserved_va_base, SZ_2M); 9876 mutex_unlock(&hdev->mmu_lock); 9877 unreserve_va: 9878 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 9879 free_data_buffer: 9880 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 9881 put_ctx: 9882 hl_ctx_put(ctx); 9883 9884 return rc; 9885 } 9886 9887 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 9888 { 9889 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9890 int min_alloc_order, rc; 9891 9892 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9893 return 0; 9894 9895 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 9896 HOST_SPACE_INTERNAL_CB_SZ, 9897 &hdev->internal_cb_pool_dma_addr, 9898 GFP_KERNEL | __GFP_ZERO); 9899 9900 if (!hdev->internal_cb_pool_virt_addr) 9901 return -ENOMEM; 9902 9903 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 9904 gaudi2_get_wait_cb_size(hdev))); 9905 9906 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 9907 if (!hdev->internal_cb_pool) { 9908 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 9909 rc = -ENOMEM; 9910 goto free_internal_cb_pool; 9911 } 9912 9913 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 9914 HOST_SPACE_INTERNAL_CB_SZ, -1); 9915 if (rc) { 9916 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 9917 rc = -EFAULT; 9918 goto destroy_internal_cb_pool; 9919 } 9920 9921 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 9922 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9923 9924 if (!hdev->internal_cb_va_base) { 9925 rc = -ENOMEM; 9926 goto destroy_internal_cb_pool; 9927 } 9928 9929 mutex_lock(&hdev->mmu_lock); 9930 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 9931 HOST_SPACE_INTERNAL_CB_SZ); 9932 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 9933 mutex_unlock(&hdev->mmu_lock); 9934 9935 if (rc) 9936 goto unreserve_internal_cb_pool; 9937 9938 return 0; 9939 9940 unreserve_internal_cb_pool: 9941 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9942 destroy_internal_cb_pool: 9943 gen_pool_destroy(hdev->internal_cb_pool); 9944 free_internal_cb_pool: 9945 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9946 hdev->internal_cb_pool_dma_addr); 9947 9948 return rc; 9949 } 9950 9951 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 9952 { 9953 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9954 9955 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9956 return; 9957 9958 mutex_lock(&hdev->mmu_lock); 9959 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9960 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9961 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 9962 mutex_unlock(&hdev->mmu_lock); 9963 9964 gen_pool_destroy(hdev->internal_cb_pool); 9965 9966 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9967 hdev->internal_cb_pool_dma_addr); 9968 } 9969 9970 static void gaudi2_restore_user_registers(struct hl_device *hdev) 9971 { 9972 gaudi2_restore_user_sm_registers(hdev); 9973 gaudi2_restore_user_qm_registers(hdev); 9974 } 9975 9976 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9977 { 9978 struct hl_device *hdev = ctx->hdev; 9979 struct asic_fixed_properties *prop = &hdev->asic_prop; 9980 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9981 int rc; 9982 9983 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9984 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 9985 if (rc) 9986 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 9987 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9988 9989 return rc; 9990 } 9991 9992 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9993 { 9994 struct hl_device *hdev = ctx->hdev; 9995 struct asic_fixed_properties *prop = &hdev->asic_prop; 9996 int rc; 9997 9998 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9999 prop->pmmu.page_size, true); 10000 if (rc) 10001 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 10002 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 10003 } 10004 10005 static int gaudi2_ctx_init(struct hl_ctx *ctx) 10006 { 10007 int rc; 10008 10009 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 10010 if (rc) 10011 return rc; 10012 10013 /* No need to clear user registers if the device has just 10014 * performed reset, we restore only nic qm registers 10015 */ 10016 if (ctx->hdev->reset_upon_device_release) 10017 gaudi2_restore_nic_qm_registers(ctx->hdev); 10018 else 10019 gaudi2_restore_user_registers(ctx->hdev); 10020 10021 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 10022 if (rc) 10023 return rc; 10024 10025 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 10026 if (rc) 10027 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10028 10029 return rc; 10030 } 10031 10032 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 10033 { 10034 if (ctx->asid == HL_KERNEL_ASID_ID) 10035 return; 10036 10037 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10038 10039 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 10040 } 10041 10042 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 10043 { 10044 struct hl_device *hdev = cs->ctx->hdev; 10045 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 10046 u32 mon_payload, sob_id, mon_id; 10047 10048 if (!cs_needs_completion(cs)) 10049 return 0; 10050 10051 /* 10052 * First 64 SOB/MON are reserved for driver for QMAN auto completion 10053 * mechanism. Each SOB/MON pair are used for a pending CS with the same 10054 * cyclic index. The SOB value is increased when each of the CS jobs is 10055 * completed. When the SOB reaches the number of CS jobs, the monitor 10056 * generates MSI-X interrupt. 10057 */ 10058 10059 sob_id = mon_id = index; 10060 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 10061 (1 << CQ_ENTRY_READY_SHIFT) | index; 10062 10063 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 10064 cs->jobs_cnt); 10065 10066 return 0; 10067 } 10068 10069 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 10070 { 10071 return HL_INVALID_QUEUE; 10072 } 10073 10074 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 10075 { 10076 struct hl_cb *cb = data; 10077 struct packet_msg_short *pkt; 10078 u32 value, ctl, pkt_size = sizeof(*pkt); 10079 10080 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 10081 memset(pkt, 0, pkt_size); 10082 10083 /* Inc by 1, Mode ADD */ 10084 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 10085 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 10086 10087 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 10088 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 10089 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10090 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 10091 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10092 10093 pkt->value = cpu_to_le32(value); 10094 pkt->ctl = cpu_to_le32(ctl); 10095 10096 return size + pkt_size; 10097 } 10098 10099 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 10100 { 10101 u32 ctl, pkt_size = sizeof(*pkt); 10102 10103 memset(pkt, 0, pkt_size); 10104 10105 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10106 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10107 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10108 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10109 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 10110 10111 pkt->value = cpu_to_le32(value); 10112 pkt->ctl = cpu_to_le32(ctl); 10113 10114 return pkt_size; 10115 } 10116 10117 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 10118 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 10119 { 10120 u32 ctl, value, pkt_size = sizeof(*pkt); 10121 u8 mask; 10122 10123 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 10124 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 10125 return 0; 10126 } 10127 10128 memset(pkt, 0, pkt_size); 10129 10130 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 10131 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 10132 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 10133 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 10134 10135 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10136 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10137 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10138 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10139 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10140 10141 pkt->value = cpu_to_le32(value); 10142 pkt->ctl = cpu_to_le32(ctl); 10143 10144 return pkt_size; 10145 } 10146 10147 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 10148 { 10149 u32 ctl, cfg, pkt_size = sizeof(*pkt); 10150 10151 memset(pkt, 0, pkt_size); 10152 10153 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 10154 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 10155 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 10156 10157 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 10158 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10159 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10160 10161 pkt->cfg = cpu_to_le32(cfg); 10162 pkt->ctl = cpu_to_le32(ctl); 10163 10164 return pkt_size; 10165 } 10166 10167 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 10168 { 10169 struct hl_cb *cb = prop->data; 10170 void *buf = (void *) (uintptr_t) (cb->kernel_address); 10171 10172 u64 monitor_base, fence_addr = 0; 10173 u32 stream_index, size = prop->size; 10174 u16 msg_addr_offset; 10175 10176 stream_index = prop->q_idx % 4; 10177 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 10178 QM_FENCE2_OFFSET + stream_index * 4; 10179 10180 /* 10181 * monitor_base should be the content of the base0 address registers, 10182 * so it will be added to the msg short offsets 10183 */ 10184 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 10185 10186 /* First monitor config packet: low address of the sync */ 10187 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 10188 monitor_base; 10189 10190 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 10191 10192 /* Second monitor config packet: high address of the sync */ 10193 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 10194 monitor_base; 10195 10196 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 10197 10198 /* 10199 * Third monitor config packet: the payload, i.e. what to write when the 10200 * sync triggers 10201 */ 10202 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10203 monitor_base; 10204 10205 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10206 10207 /* Fourth monitor config packet: bind the monitor to a sync object */ 10208 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10209 10210 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10211 prop->sob_val, msg_addr_offset); 10212 10213 /* Fence packet */ 10214 size += gaudi2_add_fence_pkt(buf + size); 10215 10216 return size; 10217 } 10218 10219 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10220 { 10221 struct hl_hw_sob *hw_sob = data; 10222 10223 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10224 10225 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10226 10227 kref_init(&hw_sob->kref); 10228 } 10229 10230 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10231 { 10232 } 10233 10234 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10235 { 10236 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10237 10238 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10239 } 10240 10241 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10242 { 10243 return 0; 10244 } 10245 10246 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10247 struct hl_cs *cs, u32 wait_queue_id, 10248 u32 collective_engine_id, u32 encaps_signal_offset) 10249 { 10250 return -EINVAL; 10251 } 10252 10253 /* 10254 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10255 * to DMMU page-size address (64MB) before mapping it in 10256 * the MMU. 10257 * The operation is performed on both the virtual and physical addresses. 10258 * for device with 6 HBMs the scramble is: 10259 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10260 * 10261 * Example: 10262 * ============================================================================= 10263 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10264 * Phys address in MMU last 10265 * HOP 10266 * ============================================================================= 10267 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10268 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10269 * ============================================================================= 10270 */ 10271 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10272 { 10273 struct asic_fixed_properties *prop = &hdev->asic_prop; 10274 u32 divisor, mod_va; 10275 u64 div_va; 10276 10277 /* accept any address in the DRAM address space */ 10278 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10279 VA_HBM_SPACE_END)) { 10280 10281 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10282 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10283 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10284 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10285 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10286 } 10287 10288 return raw_addr; 10289 } 10290 10291 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10292 { 10293 struct asic_fixed_properties *prop = &hdev->asic_prop; 10294 u32 divisor, mod_va; 10295 u64 div_va; 10296 10297 /* accept any address in the DRAM address space */ 10298 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10299 VA_HBM_SPACE_END)) { 10300 10301 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10302 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10303 PAGE_SIZE_64MB, &mod_va); 10304 10305 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10306 (div_va * divisor + mod_va)); 10307 } 10308 10309 return scrambled_addr; 10310 } 10311 10312 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10313 { 10314 u32 base = 0, dcore_id, dec_id; 10315 10316 if (core_id >= NUMBER_OF_DEC) { 10317 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10318 goto out; 10319 } 10320 10321 if (core_id < 8) { 10322 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10323 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10324 10325 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10326 dec_id * DCORE_VDEC_OFFSET; 10327 } else { 10328 /* PCIe Shared Decoder */ 10329 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10330 } 10331 out: 10332 return base; 10333 } 10334 10335 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10336 u32 *block_size, u32 *block_id) 10337 { 10338 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10339 int i; 10340 10341 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10342 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10343 *block_id = i; 10344 if (block_size) 10345 *block_size = gaudi2->mapped_blocks[i].size; 10346 return 0; 10347 } 10348 } 10349 10350 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10351 10352 return -EINVAL; 10353 } 10354 10355 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10356 u32 block_id, u32 block_size) 10357 { 10358 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10359 u64 offset_in_bar; 10360 u64 address; 10361 int rc; 10362 10363 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10364 dev_err(hdev->dev, "Invalid block id %u", block_id); 10365 return -EINVAL; 10366 } 10367 10368 /* we allow mapping only an entire block */ 10369 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10370 dev_err(hdev->dev, "Invalid block size %u", block_size); 10371 return -EINVAL; 10372 } 10373 10374 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10375 10376 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10377 10378 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10379 VM_DONTCOPY | VM_NORESERVE); 10380 10381 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10382 block_size, vma->vm_page_prot); 10383 if (rc) 10384 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10385 10386 return rc; 10387 } 10388 10389 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10390 { 10391 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10392 10393 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10394 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10395 10396 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10397 WREG32(irq_handler_offset, 10398 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10399 } 10400 10401 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10402 { 10403 switch (mmu_id) { 10404 case HW_CAP_DCORE0_DMMU0: 10405 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 10406 break; 10407 case HW_CAP_DCORE0_DMMU1: 10408 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 10409 break; 10410 case HW_CAP_DCORE0_DMMU2: 10411 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 10412 break; 10413 case HW_CAP_DCORE0_DMMU3: 10414 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 10415 break; 10416 case HW_CAP_DCORE1_DMMU0: 10417 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 10418 break; 10419 case HW_CAP_DCORE1_DMMU1: 10420 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 10421 break; 10422 case HW_CAP_DCORE1_DMMU2: 10423 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 10424 break; 10425 case HW_CAP_DCORE1_DMMU3: 10426 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 10427 break; 10428 case HW_CAP_DCORE2_DMMU0: 10429 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 10430 break; 10431 case HW_CAP_DCORE2_DMMU1: 10432 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 10433 break; 10434 case HW_CAP_DCORE2_DMMU2: 10435 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 10436 break; 10437 case HW_CAP_DCORE2_DMMU3: 10438 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 10439 break; 10440 case HW_CAP_DCORE3_DMMU0: 10441 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 10442 break; 10443 case HW_CAP_DCORE3_DMMU1: 10444 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 10445 break; 10446 case HW_CAP_DCORE3_DMMU2: 10447 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 10448 break; 10449 case HW_CAP_DCORE3_DMMU3: 10450 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 10451 break; 10452 case HW_CAP_PMMU: 10453 *mmu_base = mmPMMU_HBW_MMU_BASE; 10454 break; 10455 default: 10456 return -EINVAL; 10457 } 10458 10459 return 0; 10460 } 10461 10462 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 10463 { 10464 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 10465 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10466 u32 mmu_base; 10467 10468 if (!(gaudi2->hw_cap_initialized & mmu_id)) 10469 return; 10470 10471 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 10472 return; 10473 10474 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 10475 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 10476 } 10477 10478 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 10479 { 10480 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 10481 10482 /* check all HMMUs */ 10483 for (i = 0 ; i < num_of_hmmus ; i++) { 10484 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 10485 10486 if (mmu_cap_mask & mmu_id) 10487 gaudi2_ack_mmu_error(hdev, mmu_id); 10488 } 10489 10490 /* check PMMU */ 10491 if (mmu_cap_mask & HW_CAP_PMMU) 10492 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 10493 10494 return 0; 10495 } 10496 10497 static void gaudi2_get_msi_info(__le32 *table) 10498 { 10499 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 10500 } 10501 10502 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 10503 { 10504 switch (pll_idx) { 10505 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 10506 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 10507 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 10508 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 10509 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 10510 case HL_GAUDI2_MME_PLL: return MME_PLL; 10511 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 10512 case HL_GAUDI2_IF_PLL: return IF_PLL; 10513 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 10514 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 10515 case HL_GAUDI2_VID_PLL: return VID_PLL; 10516 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 10517 default: return -EINVAL; 10518 } 10519 } 10520 10521 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 10522 { 10523 /* Not implemented */ 10524 return 0; 10525 } 10526 10527 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 10528 { 10529 /* Not implemented */ 10530 return 0; 10531 } 10532 10533 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 10534 struct hl_device *hdev, struct hl_mon_state_dump *mon) 10535 { 10536 /* Not implemented */ 10537 return 0; 10538 } 10539 10540 10541 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 10542 u64 status_base_offset, enum hl_sync_engine_type engine_type, 10543 u32 engine_id, char **buf, size_t *size, size_t *offset) 10544 { 10545 /* Not implemented */ 10546 return 0; 10547 } 10548 10549 10550 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 10551 .monitor_valid = gaudi2_monitor_valid, 10552 .print_single_monitor = gaudi2_print_single_monitor, 10553 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 10554 .print_fences_single_engine = gaudi2_print_fences_single_engine, 10555 }; 10556 10557 static void gaudi2_state_dump_init(struct hl_device *hdev) 10558 { 10559 /* Not implemented */ 10560 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 10561 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 10562 } 10563 10564 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 10565 { 10566 return 0; 10567 } 10568 10569 static u32 *gaudi2_get_stream_master_qid_arr(void) 10570 { 10571 return NULL; 10572 } 10573 10574 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 10575 struct attribute_group *dev_vrm_attr_grp) 10576 { 10577 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 10578 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 10579 } 10580 10581 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 10582 u32 page_size, u32 *real_page_size, bool is_dram_addr) 10583 { 10584 struct asic_fixed_properties *prop = &hdev->asic_prop; 10585 10586 /* for host pages the page size must be */ 10587 if (!is_dram_addr) { 10588 if (page_size % mmu_prop->page_size) 10589 goto page_size_err; 10590 10591 *real_page_size = mmu_prop->page_size; 10592 return 0; 10593 } 10594 10595 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 10596 goto page_size_err; 10597 10598 /* 10599 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 10600 * than DRAM page size). 10601 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 10602 * this mismatch when calculating the address to place in the MMU page table. 10603 * (in that case also make sure that the dram_page_size is not greater than the 10604 * mmu page size) 10605 */ 10606 *real_page_size = prop->dram_page_size; 10607 10608 return 0; 10609 10610 page_size_err: 10611 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 10612 page_size, mmu_prop->page_size >> 10); 10613 return -EFAULT; 10614 } 10615 10616 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 10617 { 10618 return -EOPNOTSUPP; 10619 } 10620 10621 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 10622 { 10623 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10624 10625 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 10626 return 0; 10627 10628 return hl_fw_send_device_activity(hdev, open); 10629 } 10630 10631 static const struct hl_asic_funcs gaudi2_funcs = { 10632 .early_init = gaudi2_early_init, 10633 .early_fini = gaudi2_early_fini, 10634 .late_init = gaudi2_late_init, 10635 .late_fini = gaudi2_late_fini, 10636 .sw_init = gaudi2_sw_init, 10637 .sw_fini = gaudi2_sw_fini, 10638 .hw_init = gaudi2_hw_init, 10639 .hw_fini = gaudi2_hw_fini, 10640 .halt_engines = gaudi2_halt_engines, 10641 .suspend = gaudi2_suspend, 10642 .resume = gaudi2_resume, 10643 .mmap = gaudi2_mmap, 10644 .ring_doorbell = gaudi2_ring_doorbell, 10645 .pqe_write = gaudi2_pqe_write, 10646 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 10647 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 10648 .scrub_device_mem = gaudi2_scrub_device_mem, 10649 .scrub_device_dram = gaudi2_scrub_device_dram, 10650 .get_int_queue_base = NULL, 10651 .test_queues = gaudi2_test_queues, 10652 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 10653 .asic_dma_pool_free = gaudi2_dma_pool_free, 10654 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 10655 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 10656 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 10657 .asic_dma_map_single = gaudi2_dma_map_single, 10658 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 10659 .cs_parser = gaudi2_cs_parser, 10660 .asic_dma_map_sgtable = hl_dma_map_sgtable, 10661 .add_end_of_cb_packets = NULL, 10662 .update_eq_ci = gaudi2_update_eq_ci, 10663 .context_switch = gaudi2_context_switch, 10664 .restore_phase_topology = gaudi2_restore_phase_topology, 10665 .debugfs_read_dma = gaudi2_debugfs_read_dma, 10666 .add_device_attr = gaudi2_add_device_attr, 10667 .handle_eqe = gaudi2_handle_eqe, 10668 .get_events_stat = gaudi2_get_events_stat, 10669 .read_pte = NULL, 10670 .write_pte = NULL, 10671 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 10672 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 10673 .mmu_prefetch_cache_range = NULL, 10674 .send_heartbeat = gaudi2_send_heartbeat, 10675 .debug_coresight = gaudi2_debug_coresight, 10676 .is_device_idle = gaudi2_is_device_idle, 10677 .compute_reset_late_init = gaudi2_compute_reset_late_init, 10678 .hw_queues_lock = gaudi2_hw_queues_lock, 10679 .hw_queues_unlock = gaudi2_hw_queues_unlock, 10680 .get_pci_id = gaudi2_get_pci_id, 10681 .get_eeprom_data = gaudi2_get_eeprom_data, 10682 .get_monitor_dump = gaudi2_get_monitor_dump, 10683 .send_cpu_message = gaudi2_send_cpu_message, 10684 .pci_bars_map = gaudi2_pci_bars_map, 10685 .init_iatu = gaudi2_init_iatu, 10686 .rreg = hl_rreg, 10687 .wreg = hl_wreg, 10688 .halt_coresight = gaudi2_halt_coresight, 10689 .ctx_init = gaudi2_ctx_init, 10690 .ctx_fini = gaudi2_ctx_fini, 10691 .pre_schedule_cs = gaudi2_pre_schedule_cs, 10692 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 10693 .load_firmware_to_device = NULL, 10694 .load_boot_fit_to_device = NULL, 10695 .get_signal_cb_size = gaudi2_get_signal_cb_size, 10696 .get_wait_cb_size = gaudi2_get_wait_cb_size, 10697 .gen_signal_cb = gaudi2_gen_signal_cb, 10698 .gen_wait_cb = gaudi2_gen_wait_cb, 10699 .reset_sob = gaudi2_reset_sob, 10700 .reset_sob_group = gaudi2_reset_sob_group, 10701 .get_device_time = gaudi2_get_device_time, 10702 .pb_print_security_errors = gaudi2_pb_print_security_errors, 10703 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 10704 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 10705 .get_dec_base_addr = gaudi2_get_dec_base_addr, 10706 .scramble_addr = gaudi2_mmu_scramble_addr, 10707 .descramble_addr = gaudi2_mmu_descramble_addr, 10708 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 10709 .get_hw_block_id = gaudi2_get_hw_block_id, 10710 .hw_block_mmap = gaudi2_block_mmap, 10711 .enable_events_from_fw = gaudi2_enable_events_from_fw, 10712 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 10713 .get_msi_info = gaudi2_get_msi_info, 10714 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 10715 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 10716 .init_firmware_loader = gaudi2_init_firmware_loader, 10717 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 10718 .state_dump_init = gaudi2_state_dump_init, 10719 .get_sob_addr = &gaudi2_get_sob_addr, 10720 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 10721 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 10722 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 10723 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 10724 .access_dev_mem = hl_access_dev_mem, 10725 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 10726 .set_engine_cores = gaudi2_set_engine_cores, 10727 .send_device_activity = gaudi2_send_device_activity, 10728 .set_dram_properties = gaudi2_set_dram_properties, 10729 .set_binning_masks = gaudi2_set_binning_masks, 10730 }; 10731 10732 void gaudi2_set_asic_funcs(struct hl_device *hdev) 10733 { 10734 hdev->asic_funcs = &gaudi2_funcs; 10735 } 10736