1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/hw_ip/mmu/mmu_general.h" 11 #include "../include/hw_ip/mmu/mmu_v2_0.h" 12 #include "../include/gaudi2/gaudi2_packets.h" 13 #include "../include/gaudi2/gaudi2_reg_map.h" 14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 16 17 #include <linux/module.h> 18 #include <linux/pci.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 23 24 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ 26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 29 #define GAUDI2_RESET_POLL_CNT 3 30 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 31 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 32 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 33 #define GAUDI2_CB_POOL_CB_CNT 512 34 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 37 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 39 40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 41 42 /* 43 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 44 * and the code relies on that value (for array size etc..) we define another value 45 * for MAX faulty TPCs which reflects the cluster binning requirements 46 */ 47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 48 #define MAX_FAULTY_XBARS 1 49 #define MAX_FAULTY_EDMAS 1 50 #define MAX_FAULTY_DECODERS 1 51 52 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 53 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 54 #define GAUDI2_DECODER_FULL_MASK 0x3FF 55 56 #define GAUDI2_NA_EVENT_CAUSE 0xFF 57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 78 79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 81 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 82 83 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 85 86 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 87 88 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \ 89 (!((dma_core_idle_ind_mask) & \ 90 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \ 91 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK)))) 92 93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 94 95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 96 97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 98 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 99 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 100 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 101 102 #define PCIE_DEC_EN_MASK 0x300 103 #define DEC_WORK_STATE_IDLE 0 104 #define DEC_WORK_STATE_PEND 3 105 #define IS_DEC_IDLE(dec_swreg15) \ 106 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 107 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 108 109 /* HBM MMU address scrambling parameters */ 110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 115 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 116 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 117 #define MMU_RANGE_INV_EN_SHIFT 0 118 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 119 #define MMU_RANGE_INV_ASID_SHIFT 2 120 121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 122 * a 2 entries FIFO, and hence it is not enabled for it. 123 */ 124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 126 127 #define GAUDI2_MAX_STRING_LEN 64 128 129 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 130 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 131 132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 133 134 enum hl_pmmu_fatal_cause { 135 LATENCY_RD_OUT_FIFO_OVERRUN, 136 LATENCY_WR_OUT_FIFO_OVERRUN, 137 }; 138 139 enum hl_pcie_drain_ind_cause { 140 LBW_AXI_DRAIN_IND, 141 HBW_AXI_DRAIN_IND 142 }; 143 144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 145 [HBM_ID0] = 0xFFFC, 146 [HBM_ID1] = 0xFFCF, 147 [HBM_ID2] = 0xF7F7, 148 [HBM_ID3] = 0x7F7F, 149 [HBM_ID4] = 0xFCFF, 150 [HBM_ID5] = 0xCFFF, 151 }; 152 153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 154 [0] = HBM_ID0, 155 [1] = HBM_ID1, 156 [2] = HBM_ID4, 157 [3] = HBM_ID5, 158 }; 159 160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 161 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 162 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 163 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 164 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 165 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 166 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 167 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 168 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 169 }; 170 171 static const int gaudi2_qman_async_event_id[] = { 172 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 173 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 174 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 175 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 176 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 177 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 178 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 179 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 180 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 185 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 186 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 187 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 188 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 189 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 190 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 191 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 192 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 193 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 194 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 195 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 196 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 197 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 198 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 199 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 200 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 201 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 202 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 203 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 204 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 205 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 206 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 207 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 208 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 209 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 210 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 211 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 212 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 213 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 214 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 215 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 216 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 217 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 218 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 219 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 220 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 225 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 226 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 227 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 228 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 229 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 230 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 231 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 232 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 233 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 234 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 235 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 236 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 237 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 238 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 239 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 240 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 241 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 242 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 243 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 244 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 245 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 246 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 247 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 248 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 249 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 250 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 251 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 252 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 253 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 254 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 255 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 256 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 261 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 262 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 263 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 264 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 265 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 266 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 267 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 268 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 269 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 270 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 271 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 272 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 273 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 274 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 275 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 276 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 277 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 278 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 279 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 280 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 281 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 282 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 283 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 284 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 285 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 286 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 287 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 288 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 289 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 290 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 291 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 292 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 297 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 298 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 299 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 300 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 301 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 302 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 303 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 304 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 305 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 306 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 307 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 308 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 309 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 310 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 311 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 312 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 313 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 314 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 315 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 316 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 317 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 318 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 319 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 320 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 321 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 322 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 323 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 324 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 325 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 326 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 327 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 328 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 329 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 330 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 331 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 332 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 333 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 334 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 335 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 336 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 337 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 338 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 339 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 340 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 341 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 342 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 343 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 344 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 345 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 346 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 347 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 348 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 349 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 350 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 351 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 352 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 353 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 354 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 355 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 356 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 357 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 358 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 359 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 360 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 361 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 362 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 363 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 364 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 365 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 366 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 367 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 368 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 369 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 370 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 371 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 372 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 373 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 374 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 375 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 376 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 377 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 378 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 379 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 380 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 381 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 382 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 383 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 384 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 385 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 386 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 387 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 388 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 389 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 390 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 391 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 392 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 393 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 394 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 395 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 396 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 397 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 398 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 399 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 400 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 401 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 402 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 403 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 404 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 405 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 406 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 407 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 408 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 409 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 410 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 411 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 412 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 413 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 414 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 415 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 416 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 417 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 418 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 419 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 420 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 421 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 422 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 423 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 424 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 425 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 426 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 427 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 428 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 429 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 430 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 431 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 432 }; 433 434 static const int gaudi2_dma_core_async_event_id[] = { 435 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 436 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 437 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 438 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 439 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 440 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 441 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 442 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 443 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 444 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 445 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 446 }; 447 448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 449 "qman sei intr", 450 "arc sei intr" 451 }; 452 453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 454 "AXI_TERMINATOR WR", 455 "AXI_TERMINATOR RD", 456 "AXI SPLIT SEI Status" 457 }; 458 459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 460 "cbu_bresp_sei_intr_cause", 461 "cbu_rresp_sei_intr_cause", 462 "lbu_bresp_sei_intr_cause", 463 "lbu_rresp_sei_intr_cause", 464 "cbu_axi_split_intr_cause", 465 "lbu_axi_split_intr_cause", 466 "arc_ip_excptn_sei_intr_cause", 467 "dmi_bresp_sei_intr_cause", 468 "aux2apb_err_sei_intr_cause", 469 "cfg_lbw_wr_terminated_intr_cause", 470 "cfg_lbw_rd_terminated_intr_cause", 471 "cfg_dccm_wr_terminated_intr_cause", 472 "cfg_dccm_rd_terminated_intr_cause", 473 "cfg_hbw_rd_terminated_intr_cause" 474 }; 475 476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 477 "msix_vcd_hbw_sei", 478 "msix_l2c_hbw_sei", 479 "msix_nrm_hbw_sei", 480 "msix_abnrm_hbw_sei", 481 "msix_vcd_lbw_sei", 482 "msix_l2c_lbw_sei", 483 "msix_nrm_lbw_sei", 484 "msix_abnrm_lbw_sei", 485 "apb_vcd_lbw_sei", 486 "apb_l2c_lbw_sei", 487 "apb_nrm_lbw_sei", 488 "apb_abnrm_lbw_sei", 489 "dec_sei", 490 "dec_apb_sei", 491 "trc_apb_sei", 492 "lbw_mstr_if_sei", 493 "axi_split_bresp_err_sei", 494 "hbw_axi_wr_viol_sei", 495 "hbw_axi_rd_viol_sei", 496 "lbw_axi_wr_viol_sei", 497 "lbw_axi_rd_viol_sei", 498 "vcd_spi", 499 "l2c_spi", 500 "nrm_spi", 501 "abnrm_spi", 502 }; 503 504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 505 "PQ AXI HBW error", 506 "CQ AXI HBW error", 507 "CP AXI HBW error", 508 "CP error due to undefined OPCODE", 509 "CP encountered STOP OPCODE", 510 "CP AXI LBW error", 511 "CP WRREG32 or WRBULK returned error", 512 "N/A", 513 "FENCE 0 inc over max value and clipped", 514 "FENCE 1 inc over max value and clipped", 515 "FENCE 2 inc over max value and clipped", 516 "FENCE 3 inc over max value and clipped", 517 "FENCE 0 dec under min value and clipped", 518 "FENCE 1 dec under min value and clipped", 519 "FENCE 2 dec under min value and clipped", 520 "FENCE 3 dec under min value and clipped", 521 "CPDMA Up overflow", 522 "PQC L2H error" 523 }; 524 525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 526 "RSVD0", 527 "CQ AXI HBW error", 528 "CP AXI HBW error", 529 "CP error due to undefined OPCODE", 530 "CP encountered STOP OPCODE", 531 "CP AXI LBW error", 532 "CP WRREG32 or WRBULK returned error", 533 "N/A", 534 "FENCE 0 inc over max value and clipped", 535 "FENCE 1 inc over max value and clipped", 536 "FENCE 2 inc over max value and clipped", 537 "FENCE 3 inc over max value and clipped", 538 "FENCE 0 dec under min value and clipped", 539 "FENCE 1 dec under min value and clipped", 540 "FENCE 2 dec under min value and clipped", 541 "FENCE 3 dec under min value and clipped", 542 "CPDMA Up overflow", 543 "RSVD17", 544 "CQ_WR_IFIFO_CI_ERR", 545 "CQ_WR_CTL_CI_ERR", 546 "ARC_CQF_RD_ERR", 547 "ARC_CQ_WR_IFIFO_CI_ERR", 548 "ARC_CQ_WR_CTL_CI_ERR", 549 "ARC_AXI_ERR", 550 "CP_SWITCH_WDT_ERR" 551 }; 552 553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 554 "Choice push while full error", 555 "Choice Q watchdog error", 556 "MSG AXI LBW returned with error" 557 }; 558 559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 560 "qm_axi_err", 561 "qm_trace_fence_events", 562 "qm_sw_err", 563 "qm_cp_sw_stop", 564 "lbw_mstr_rresp_err", 565 "lbw_mstr_bresp_err", 566 "lbw_msg_slverr", 567 "hbw_msg_slverr", 568 "wbc_slverr", 569 "hbw_mstr_rresp_err", 570 "hbw_mstr_bresp_err", 571 "sb_resp_intr", 572 "mrsb_resp_intr", 573 "core_dw_status_0", 574 "core_dw_status_1", 575 "core_dw_status_2", 576 "core_dw_status_3", 577 "core_dw_status_4", 578 "core_dw_status_5", 579 "core_dw_status_6", 580 "core_dw_status_7", 581 "async_arc2cpu_sei_intr", 582 }; 583 584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 585 "tpc_address_exceed_slm", 586 "tpc_div_by_0", 587 "tpc_spu_mac_overflow", 588 "tpc_spu_addsub_overflow", 589 "tpc_spu_abs_overflow", 590 "tpc_spu_fma_fp_dst_nan", 591 "tpc_spu_fma_fp_dst_inf", 592 "tpc_spu_convert_fp_dst_nan", 593 "tpc_spu_convert_fp_dst_inf", 594 "tpc_spu_fp_dst_denorm", 595 "tpc_vpu_mac_overflow", 596 "tpc_vpu_addsub_overflow", 597 "tpc_vpu_abs_overflow", 598 "tpc_vpu_convert_fp_dst_nan", 599 "tpc_vpu_convert_fp_dst_inf", 600 "tpc_vpu_fma_fp_dst_nan", 601 "tpc_vpu_fma_fp_dst_inf", 602 "tpc_vpu_fp_dst_denorm", 603 "tpc_assertions", 604 "tpc_illegal_instruction", 605 "tpc_pc_wrap_around", 606 "tpc_qm_sw_err", 607 "tpc_hbw_rresp_err", 608 "tpc_hbw_bresp_err", 609 "tpc_lbw_rresp_err", 610 "tpc_lbw_bresp_err", 611 "st_unlock_already_locked", 612 "invalid_lock_access", 613 "LD_L protection violation", 614 "ST_L protection violation", 615 }; 616 617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 618 "agu_resp_intr", 619 "qman_axi_err", 620 "wap sei (wbc axi err)", 621 "arc sei", 622 "cfg access error", 623 "qm_sw_err", 624 "sbte_dbg_intr_0", 625 "sbte_dbg_intr_1", 626 "sbte_dbg_intr_2", 627 "sbte_dbg_intr_3", 628 "sbte_dbg_intr_4", 629 "sbte_prtn_intr_0", 630 "sbte_prtn_intr_1", 631 "sbte_prtn_intr_2", 632 "sbte_prtn_intr_3", 633 "sbte_prtn_intr_4", 634 }; 635 636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 637 "i0", 638 "i1", 639 "i2", 640 "i3", 641 "i4", 642 }; 643 644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 645 "WBC ERR RESP_0", 646 "WBC ERR RESP_1", 647 "AP SOURCE POS INF", 648 "AP SOURCE NEG INF", 649 "AP SOURCE NAN", 650 "AP RESULT POS INF", 651 "AP RESULT NEG INF", 652 }; 653 654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 655 "HBW Read returned with error RRESP", 656 "HBW write returned with error BRESP", 657 "LBW write returned with error BRESP", 658 "descriptor_fifo_overflow", 659 "KDMA SB LBW Read returned with error", 660 "KDMA WBC LBW Write returned with error", 661 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 662 "WRONG CFG FOR COMMIT IN LIN DMA" 663 }; 664 665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 666 "HBW/LBW Read returned with error RRESP", 667 "HBW/LBW write returned with error BRESP", 668 "LBW write returned with error BRESP", 669 "descriptor_fifo_overflow", 670 "KDMA SB LBW Read returned with error", 671 "KDMA WBC LBW Write returned with error", 672 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 673 "WRONG CFG FOR COMMIT IN LIN DMA" 674 }; 675 676 struct gaudi2_sm_sei_cause_data { 677 const char *cause_name; 678 const char *log_name; 679 u32 log_mask; 680 }; 681 682 static const struct gaudi2_sm_sei_cause_data 683 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 684 {"calculated SO value overflow/underflow", "SOB group ID", 0x7FF}, 685 {"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF}, 686 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF}, 687 }; 688 689 static const char * const 690 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 691 "LATENCY_RD_OUT_FIFO_OVERRUN", 692 "LATENCY_WR_OUT_FIFO_OVERRUN", 693 }; 694 695 static const char * const 696 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 697 "LATENCY_RD_OUT_FIFO_OVERRUN", 698 "LATENCY_WR_OUT_FIFO_OVERRUN", 699 }; 700 701 static const char * const 702 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 703 "AXI drain HBW", 704 "AXI drain LBW", 705 }; 706 707 static const char * const 708 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 709 "HBW error response", 710 "LBW error response", 711 "TLP is blocked by RR" 712 }; 713 714 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 715 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 716 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 717 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 718 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 719 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 720 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 721 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 722 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 728 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 729 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 730 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 732 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 733 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 734 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 736 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 737 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 738 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 740 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 741 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 742 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 744 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 745 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 746 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 748 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 749 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 750 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 752 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 753 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 754 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 756 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 757 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 758 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 760 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 761 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 762 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 768 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 769 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 770 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 772 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 773 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 774 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 776 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 777 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 778 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 780 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 781 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 782 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 784 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 785 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 786 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 788 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 789 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 790 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 792 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 793 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 794 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 796 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 797 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 798 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 804 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 805 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 806 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 808 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 809 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 810 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 812 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 813 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 814 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 816 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 817 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 818 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 820 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 821 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 822 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 824 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 825 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 826 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 828 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 829 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 830 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 832 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 833 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 834 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 840 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 841 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 842 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 844 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 845 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 846 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 848 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 849 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 850 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 852 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 853 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 854 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 856 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 857 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 858 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 860 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 861 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 862 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 864 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 865 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 866 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 868 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 869 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 870 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 871 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 872 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 873 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 874 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 875 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 876 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 877 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 878 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 879 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 880 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 881 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 882 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 883 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 884 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 885 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 886 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 887 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 888 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 889 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 890 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 891 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 892 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 893 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 894 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 895 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 896 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 897 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 898 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 899 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 900 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 901 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 902 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 903 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 904 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 905 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 906 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 907 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 908 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 909 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 910 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 911 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 912 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 913 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 914 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 915 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 916 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 917 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 918 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 919 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 920 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 921 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 922 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 923 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 924 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 925 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 926 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 927 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 928 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 929 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 930 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 931 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 932 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 933 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 934 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 935 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 936 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 937 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 938 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 939 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 940 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 941 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 942 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 943 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 944 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 945 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 946 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 947 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 948 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 949 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 950 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 951 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 952 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 953 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 954 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 955 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 956 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 957 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 958 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 959 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 960 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 961 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 962 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 963 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 964 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 965 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 966 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 967 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 968 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 969 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 970 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 971 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 972 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 973 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 974 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 975 }; 976 977 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 978 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 979 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 980 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 981 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 982 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 983 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 984 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 985 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 986 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 987 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 988 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 989 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 990 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 991 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 992 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 993 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 994 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 995 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 996 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 997 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 998 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 999 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 1000 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1001 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1002 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1003 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1004 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1005 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1006 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1007 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1008 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1009 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1010 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1011 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1012 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1013 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1014 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1015 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1016 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1017 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1018 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1019 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1020 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1021 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1022 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1023 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1024 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1025 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1026 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1027 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1028 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1029 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1030 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1031 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1032 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1033 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1034 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1035 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1036 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1037 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1038 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1039 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1040 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1041 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1042 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1043 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1044 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1045 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1046 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1047 }; 1048 1049 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1050 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1051 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1052 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1053 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1054 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1055 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1056 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1057 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1058 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1059 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1060 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1061 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1062 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1063 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1064 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1065 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1066 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1067 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1068 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1069 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1070 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1071 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1072 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1073 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1074 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1075 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1076 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1077 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1078 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1079 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1080 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1081 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1082 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1083 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1084 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1085 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1086 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1087 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1088 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1089 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1090 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1091 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1092 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1093 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1094 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1095 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1096 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1097 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1098 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1099 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1100 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1101 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1102 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1103 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1104 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1105 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1106 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1107 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1108 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1109 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1110 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1111 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1112 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1113 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1114 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1115 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1116 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1117 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1118 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1119 }; 1120 1121 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1122 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1123 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1124 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1125 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1126 }; 1127 1128 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1129 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1130 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1131 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1132 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1133 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1134 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1135 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1136 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1142 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1143 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1144 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1146 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1147 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1148 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1174 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1175 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1176 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1182 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1183 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1184 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1186 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1187 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1188 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1210 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1211 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1212 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1218 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1219 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1220 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1222 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1223 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1224 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1246 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1247 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1248 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1254 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1255 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1256 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1258 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1259 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1260 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1282 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1283 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1284 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1285 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1286 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1287 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1288 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1289 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1290 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1291 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1292 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1293 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1294 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1295 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1296 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1297 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1298 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1299 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1300 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1301 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1302 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1303 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1304 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1305 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1306 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1307 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1308 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1309 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1310 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1311 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1312 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1313 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1314 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1315 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1316 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1317 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1318 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1319 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1320 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1321 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1322 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1323 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1324 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1325 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1326 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1327 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1328 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1329 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1330 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1331 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1332 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1333 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1334 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1335 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1336 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1337 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1338 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1339 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1340 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1341 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1342 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1343 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1344 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1345 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1346 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1347 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1348 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1349 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1350 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1351 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1352 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1353 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1354 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1355 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1356 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1357 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1358 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1359 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1360 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1361 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1362 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1363 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1364 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1365 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1366 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1367 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1368 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1369 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1370 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1371 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1372 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1373 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1374 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1375 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1376 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1377 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1378 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1379 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1380 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1381 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1382 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1383 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1384 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1385 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1386 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1387 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1388 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1389 }; 1390 1391 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1392 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1393 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1394 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1395 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1396 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1397 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1398 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1399 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1400 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1401 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1402 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1403 }; 1404 1405 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1406 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1407 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1408 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1409 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1410 }; 1411 1412 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1413 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1414 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1415 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1416 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1417 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1418 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1419 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1420 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1421 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1422 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1423 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1424 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1425 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1426 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1427 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1428 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1429 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1430 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1431 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1432 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1433 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1434 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1435 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1436 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1437 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1438 }; 1439 1440 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1441 [ROTATOR_ID_0] = mmROT0_BASE, 1442 [ROTATOR_ID_1] = mmROT1_BASE 1443 }; 1444 1445 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1446 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1447 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1448 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1449 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1450 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1451 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1452 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1453 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1454 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1455 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1456 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1457 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1458 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1459 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1460 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1461 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1462 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1463 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1464 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1465 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1466 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1467 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1468 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1469 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1470 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1471 }; 1472 1473 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1474 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1475 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1476 }; 1477 1478 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1479 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1480 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1481 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1482 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1483 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1484 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1485 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1486 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1487 }; 1488 1489 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1490 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1491 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1492 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1493 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1494 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1495 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1496 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1497 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1498 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1499 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1500 }; 1501 1502 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = { 1503 RTR_ID_X_Y(2, 4), 1504 RTR_ID_X_Y(3, 4), 1505 RTR_ID_X_Y(4, 4), 1506 RTR_ID_X_Y(5, 4), 1507 RTR_ID_X_Y(6, 4), 1508 RTR_ID_X_Y(7, 4), 1509 RTR_ID_X_Y(8, 4), 1510 RTR_ID_X_Y(9, 4), 1511 RTR_ID_X_Y(10, 4), 1512 RTR_ID_X_Y(11, 4), 1513 RTR_ID_X_Y(12, 4), 1514 RTR_ID_X_Y(13, 4), 1515 RTR_ID_X_Y(14, 4), 1516 RTR_ID_X_Y(15, 4), 1517 RTR_ID_X_Y(16, 4), 1518 RTR_ID_X_Y(17, 4), 1519 RTR_ID_X_Y(2, 11), 1520 RTR_ID_X_Y(3, 11), 1521 RTR_ID_X_Y(4, 11), 1522 RTR_ID_X_Y(5, 11), 1523 RTR_ID_X_Y(6, 11), 1524 RTR_ID_X_Y(7, 11), 1525 RTR_ID_X_Y(8, 11), 1526 RTR_ID_X_Y(9, 11), 1527 RTR_ID_X_Y(0, 0),/* 24 no id */ 1528 RTR_ID_X_Y(0, 0),/* 25 no id */ 1529 RTR_ID_X_Y(0, 0),/* 26 no id */ 1530 RTR_ID_X_Y(0, 0),/* 27 no id */ 1531 RTR_ID_X_Y(14, 11), 1532 RTR_ID_X_Y(15, 11), 1533 RTR_ID_X_Y(16, 11), 1534 RTR_ID_X_Y(17, 11) 1535 }; 1536 1537 enum rtr_id { 1538 DCORE0_RTR0, 1539 DCORE0_RTR1, 1540 DCORE0_RTR2, 1541 DCORE0_RTR3, 1542 DCORE0_RTR4, 1543 DCORE0_RTR5, 1544 DCORE0_RTR6, 1545 DCORE0_RTR7, 1546 DCORE1_RTR0, 1547 DCORE1_RTR1, 1548 DCORE1_RTR2, 1549 DCORE1_RTR3, 1550 DCORE1_RTR4, 1551 DCORE1_RTR5, 1552 DCORE1_RTR6, 1553 DCORE1_RTR7, 1554 DCORE2_RTR0, 1555 DCORE2_RTR1, 1556 DCORE2_RTR2, 1557 DCORE2_RTR3, 1558 DCORE2_RTR4, 1559 DCORE2_RTR5, 1560 DCORE2_RTR6, 1561 DCORE2_RTR7, 1562 DCORE3_RTR0, 1563 DCORE3_RTR1, 1564 DCORE3_RTR2, 1565 DCORE3_RTR3, 1566 DCORE3_RTR4, 1567 DCORE3_RTR5, 1568 DCORE3_RTR6, 1569 DCORE3_RTR7, 1570 }; 1571 1572 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1573 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1574 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1575 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1576 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1577 DCORE0_RTR0 1578 }; 1579 1580 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { 1581 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1582 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1583 }; 1584 1585 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { 1586 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1587 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1588 }; 1589 1590 struct sft_info { 1591 u8 interface_id; 1592 u8 dcore_id; 1593 }; 1594 1595 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1596 {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3}, 1597 }; 1598 1599 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { 1600 DCORE0_RTR0, DCORE0_RTR0 1601 }; 1602 1603 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { 1604 DCORE2_RTR0, DCORE3_RTR7 1605 }; 1606 1607 struct mme_initiators_rtr_id { 1608 u32 wap0; 1609 u32 wap1; 1610 u32 write; 1611 u32 read; 1612 u32 sbte0; 1613 u32 sbte1; 1614 u32 sbte2; 1615 u32 sbte3; 1616 u32 sbte4; 1617 }; 1618 1619 enum mme_initiators { 1620 MME_WAP0 = 0, 1621 MME_WAP1, 1622 MME_WRITE, 1623 MME_READ, 1624 MME_SBTE0, 1625 MME_SBTE1, 1626 MME_SBTE2, 1627 MME_SBTE3, 1628 MME_SBTE4, 1629 MME_INITIATORS_MAX 1630 }; 1631 1632 static const struct mme_initiators_rtr_id 1633 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1634 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1635 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1636 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1637 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1638 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1639 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1640 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1641 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1642 }; 1643 1644 enum razwi_event_sources { 1645 RAZWI_TPC, 1646 RAZWI_MME, 1647 RAZWI_EDMA, 1648 RAZWI_PDMA, 1649 RAZWI_NIC, 1650 RAZWI_DEC, 1651 RAZWI_ROT 1652 }; 1653 1654 struct hbm_mc_error_causes { 1655 u32 mask; 1656 char cause[50]; 1657 }; 1658 1659 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 1660 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 1661 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 1662 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 1663 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 1664 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 1665 }; 1666 1667 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 1668 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 1669 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 1670 [HBM_SEI_READ_ERR] = "SEI read data error", 1671 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 1672 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 1673 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 1674 [HBM_SEI_DFI] = "SEI DFI error", 1675 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 1676 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 1677 }; 1678 1679 struct mmu_spi_sei_cause { 1680 char cause[50]; 1681 int clear_bit; 1682 }; 1683 1684 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 1685 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 1686 {"page access", 1}, /* INTERRUPT_CLR[1] */ 1687 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 1688 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 1689 {"mmu rei0", -1}, /* no clear register bit */ 1690 {"mmu rei1", -1}, /* no clear register bit */ 1691 {"stlb rei0", -1}, /* no clear register bit */ 1692 {"stlb rei1", -1}, /* no clear register bit */ 1693 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 1694 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 1695 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 1696 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 1697 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1698 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1699 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1700 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1701 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 1702 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 1703 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 1704 }; 1705 1706 struct gaudi2_cache_invld_params { 1707 u64 start_va; 1708 u64 end_va; 1709 u32 inv_start_val; 1710 u32 flags; 1711 bool range_invalidation; 1712 }; 1713 1714 struct gaudi2_tpc_idle_data { 1715 struct engines_data *e; 1716 unsigned long *mask; 1717 bool *is_idle; 1718 const char *tpc_fmt; 1719 }; 1720 1721 struct gaudi2_tpc_mmu_data { 1722 u32 rw_asid; 1723 }; 1724 1725 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 1726 1727 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 1728 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 1729 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 1730 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1731 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1732 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 1733 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 1734 bool is_memset); 1735 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 1736 1737 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 1738 { 1739 1740 } 1741 1742 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 1743 { 1744 return sizeof(struct packet_msg_short); 1745 } 1746 1747 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 1748 { 1749 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 1750 } 1751 1752 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 1753 { 1754 struct asic_fixed_properties *prop = &hdev->asic_prop; 1755 int dcore, inst, tpc_seq; 1756 u32 offset; 1757 1758 /* init the return code */ 1759 ctx->rc = 0; 1760 1761 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 1762 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 1763 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 1764 1765 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 1766 continue; 1767 1768 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 1769 1770 ctx->fn(hdev, dcore, inst, offset, ctx); 1771 if (ctx->rc) { 1772 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 1773 dcore, inst); 1774 return; 1775 } 1776 } 1777 } 1778 1779 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 1780 return; 1781 1782 /* special check for PCI TPC (DCORE0_TPC6) */ 1783 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 1784 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 1785 if (ctx->rc) 1786 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 1787 } 1788 1789 static bool gaudi2_host_phys_addr_valid(u64 addr) 1790 { 1791 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 1792 return true; 1793 1794 return false; 1795 } 1796 1797 static int set_number_of_functional_hbms(struct hl_device *hdev) 1798 { 1799 struct asic_fixed_properties *prop = &hdev->asic_prop; 1800 u8 faulty_hbms = hweight64(hdev->dram_binning); 1801 1802 /* check if all HBMs should be used */ 1803 if (!faulty_hbms) { 1804 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 1805 prop->num_functional_hbms = GAUDI2_HBM_NUM; 1806 return 0; 1807 } 1808 1809 /* 1810 * check for error condition in which number of binning 1811 * candidates is higher than the maximum supported by the 1812 * driver (in which case binning mask shall be ignored and driver will 1813 * set the default) 1814 */ 1815 if (faulty_hbms > MAX_FAULTY_HBMS) { 1816 dev_err(hdev->dev, 1817 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 1818 MAX_FAULTY_HBMS, hdev->dram_binning); 1819 return -EINVAL; 1820 } 1821 1822 /* 1823 * by default, number of functional HBMs in Gaudi2 is always 1824 * GAUDI2_HBM_NUM - 1. 1825 */ 1826 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 1827 return 0; 1828 } 1829 1830 static int gaudi2_set_dram_properties(struct hl_device *hdev) 1831 { 1832 struct asic_fixed_properties *prop = &hdev->asic_prop; 1833 u32 basic_hbm_page_size; 1834 int rc; 1835 1836 rc = set_number_of_functional_hbms(hdev); 1837 if (rc) 1838 return -EINVAL; 1839 1840 /* 1841 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 1842 * in which we are using x16 bigger page size to be able to populate the entire 1843 * HBM mappings in the TLB 1844 */ 1845 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 1846 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 1847 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 1848 prop->dram_size = prop->num_functional_hbms * SZ_16G; 1849 prop->dram_base_address = DRAM_PHYS_BASE; 1850 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 1851 prop->dram_supports_virtual_memory = true; 1852 1853 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 1854 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 1855 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 1856 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 1857 1858 /* since DRAM page size differs from DMMU page size we need to allocate 1859 * DRAM memory in units of dram_page size and mapping this memory in 1860 * units of DMMU page size. we overcome this size mismatch using a 1861 * scrambling routine which takes a DRAM page and converts it to a DMMU 1862 * page. 1863 * We therefore: 1864 * 1. partition the virtual address space to DRAM-page (whole) pages. 1865 * (suppose we get n such pages) 1866 * 2. limit the amount of virtual address space we got from 1 above to 1867 * a multiple of 64M as we don't want the scrambled address to cross 1868 * the DRAM virtual address space. 1869 * ( m = (n * DRAM_page_size) / DMMU_page_size). 1870 * 3. determine the and address accordingly 1871 * end_addr = start_addr + m * 48M 1872 * 1873 * the DRAM address MSBs (63:48) are not part of the roundup calculation 1874 */ 1875 prop->dmmu.start_addr = prop->dram_base_address + 1876 (prop->dram_page_size * 1877 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 1878 1879 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 1880 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 1881 1882 return 0; 1883 } 1884 1885 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 1886 { 1887 struct asic_fixed_properties *prop = &hdev->asic_prop; 1888 struct hw_queue_properties *q_props; 1889 u32 num_sync_stream_queues = 0; 1890 int i; 1891 1892 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 1893 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 1894 GFP_KERNEL); 1895 1896 if (!prop->hw_queues_props) 1897 return -ENOMEM; 1898 1899 q_props = prop->hw_queues_props; 1900 1901 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 1902 q_props[i].type = QUEUE_TYPE_HW; 1903 q_props[i].driver_only = 0; 1904 1905 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 1906 q_props[i].supports_sync_stream = 0; 1907 } else { 1908 q_props[i].supports_sync_stream = 1; 1909 num_sync_stream_queues++; 1910 } 1911 1912 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 1913 } 1914 1915 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 1916 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 1917 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 1918 1919 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 1920 prop->cfg_base_address = CFG_BASE; 1921 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 1922 prop->host_base_address = HOST_PHYS_BASE_0; 1923 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 1924 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 1925 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 1926 prop->user_dec_intr_count = NUMBER_OF_DEC; 1927 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 1928 prop->completion_mode = HL_COMPLETION_MODE_CS; 1929 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 1930 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 1931 1932 prop->sram_base_address = SRAM_BASE_ADDR; 1933 prop->sram_size = SRAM_SIZE; 1934 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 1935 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 1936 1937 prop->hints_range_reservation = true; 1938 1939 if (hdev->pldm) 1940 prop->mmu_pgt_size = 0x800000; /* 8MB */ 1941 else 1942 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 1943 1944 prop->mmu_pte_size = HL_PTE_SIZE; 1945 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 1946 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 1947 1948 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 1949 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 1950 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 1951 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 1952 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 1953 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 1954 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 1955 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 1956 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 1957 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 1958 prop->dmmu.page_size = PAGE_SIZE_1GB; 1959 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 1960 prop->dmmu.last_mask = LAST_MASK; 1961 prop->dmmu.host_resident = 1; 1962 /* TODO: will be duplicated until implementing per-MMU props */ 1963 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 1964 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1965 1966 /* 1967 * this is done in order to be able to validate FW descriptor (i.e. validating that 1968 * the addresses and allocated space for FW image does not cross memory bounds). 1969 * for this reason we set the DRAM size to the minimum possible and later it will 1970 * be modified according to what reported in the cpucp info packet 1971 */ 1972 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 1973 1974 hdev->pmmu_huge_range = true; 1975 prop->pmmu.host_resident = 1; 1976 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 1977 prop->pmmu.last_mask = LAST_MASK; 1978 /* TODO: will be duplicated until implementing per-MMU props */ 1979 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 1980 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1981 1982 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 1983 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 1984 prop->hints_host_hpage_reserved_va_range.start_addr = 1985 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 1986 prop->hints_host_hpage_reserved_va_range.end_addr = 1987 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 1988 1989 if (PAGE_SIZE == SZ_64K) { 1990 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 1991 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 1992 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 1993 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 1994 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 1995 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 1996 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 1997 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 1998 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 1999 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 2000 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2001 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2002 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2003 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2004 prop->pmmu.page_size = PAGE_SIZE_64KB; 2005 2006 /* shifts and masks are the same in PMMU and HPMMU */ 2007 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2008 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2009 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2010 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2011 } else { 2012 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2013 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2014 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2015 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2016 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2017 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2018 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2019 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2020 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2021 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2022 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2023 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2024 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2025 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2026 prop->pmmu.page_size = PAGE_SIZE_4KB; 2027 2028 /* shifts and masks are the same in PMMU and HPMMU */ 2029 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2030 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2031 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2032 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2033 } 2034 2035 prop->num_engine_cores = CPU_ID_MAX; 2036 prop->cfg_size = CFG_SIZE; 2037 prop->max_asid = MAX_ASID; 2038 prop->num_of_events = GAUDI2_EVENT_SIZE; 2039 2040 prop->dc_power_default = DC_POWER_DEFAULT; 2041 2042 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2043 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2044 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2045 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2046 2047 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2048 2049 prop->mme_master_slave_mode = 1; 2050 2051 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2052 (num_sync_stream_queues * HL_RSVD_SOBS); 2053 2054 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2055 (num_sync_stream_queues * HL_RSVD_MONS); 2056 2057 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2058 2059 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2060 2061 prop->fw_cpu_boot_dev_sts0_valid = false; 2062 prop->fw_cpu_boot_dev_sts1_valid = false; 2063 prop->hard_reset_done_by_fw = false; 2064 prop->gic_interrupts_enable = true; 2065 2066 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2067 2068 prop->max_dec = NUMBER_OF_DEC; 2069 2070 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2071 2072 prop->dma_mask = 64; 2073 2074 return 0; 2075 } 2076 2077 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2078 { 2079 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2080 bool is_wc[3] = {false, false, true}; 2081 int rc; 2082 2083 rc = hl_pci_bars_map(hdev, name, is_wc); 2084 if (rc) 2085 return rc; 2086 2087 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2088 2089 return 0; 2090 } 2091 2092 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2093 { 2094 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2095 struct hl_inbound_pci_region pci_region; 2096 u64 old_addr = addr; 2097 int rc; 2098 2099 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2100 return old_addr; 2101 2102 if (hdev->asic_prop.iatu_done_by_fw) 2103 return U64_MAX; 2104 2105 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2106 pci_region.mode = PCI_BAR_MATCH_MODE; 2107 pci_region.bar = DRAM_BAR_ID; 2108 pci_region.addr = addr; 2109 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2110 if (rc) 2111 return U64_MAX; 2112 2113 if (gaudi2) { 2114 old_addr = gaudi2->dram_bar_cur_addr; 2115 gaudi2->dram_bar_cur_addr = addr; 2116 } 2117 2118 return old_addr; 2119 } 2120 2121 static int gaudi2_init_iatu(struct hl_device *hdev) 2122 { 2123 struct hl_inbound_pci_region inbound_region; 2124 struct hl_outbound_pci_region outbound_region; 2125 u32 bar_addr_low, bar_addr_high; 2126 int rc; 2127 2128 if (hdev->asic_prop.iatu_done_by_fw) 2129 return 0; 2130 2131 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2132 * We must map this region in BAR match mode in order to 2133 * fetch BAR physical base address 2134 */ 2135 inbound_region.mode = PCI_BAR_MATCH_MODE; 2136 inbound_region.bar = SRAM_CFG_BAR_ID; 2137 /* Base address must be aligned to Bar size which is 256 MB */ 2138 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2139 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2140 if (rc) 2141 return rc; 2142 2143 /* Fetch physical BAR address */ 2144 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2145 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2146 2147 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2148 2149 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2150 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2151 inbound_region.bar = SRAM_CFG_BAR_ID; 2152 inbound_region.offset_in_bar = 0; 2153 inbound_region.addr = STM_FLASH_BASE_ADDR; 2154 inbound_region.size = CFG_REGION_SIZE; 2155 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2156 if (rc) 2157 return rc; 2158 2159 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2160 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2161 inbound_region.bar = SRAM_CFG_BAR_ID; 2162 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2163 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2164 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2165 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2166 if (rc) 2167 return rc; 2168 2169 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2170 inbound_region.mode = PCI_BAR_MATCH_MODE; 2171 inbound_region.bar = DRAM_BAR_ID; 2172 inbound_region.addr = DRAM_PHYS_BASE; 2173 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2174 if (rc) 2175 return rc; 2176 2177 /* Outbound Region 0 - Point to Host */ 2178 outbound_region.addr = HOST_PHYS_BASE_0; 2179 outbound_region.size = HOST_PHYS_SIZE_0; 2180 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2181 2182 return rc; 2183 } 2184 2185 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2186 { 2187 return RREG32(mmHW_STATE); 2188 } 2189 2190 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2191 { 2192 struct asic_fixed_properties *prop = &hdev->asic_prop; 2193 2194 /* 2195 * check for error condition in which number of binning candidates 2196 * is higher than the maximum supported by the driver 2197 */ 2198 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2199 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2200 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2201 hdev->tpc_binning); 2202 return -EINVAL; 2203 } 2204 2205 prop->tpc_binning_mask = hdev->tpc_binning; 2206 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2207 2208 return 0; 2209 } 2210 2211 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2212 { 2213 struct asic_fixed_properties *prop = &hdev->asic_prop; 2214 struct hw_queue_properties *q_props = prop->hw_queues_props; 2215 u64 tpc_binning_mask; 2216 u8 subst_idx = 0; 2217 int i, rc; 2218 2219 rc = gaudi2_tpc_binning_init_prop(hdev); 2220 if (rc) 2221 return rc; 2222 2223 tpc_binning_mask = prop->tpc_binning_mask; 2224 2225 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2226 u8 subst_seq, binned, qid_base; 2227 2228 if (tpc_binning_mask == 0) 2229 break; 2230 2231 if (subst_idx == 0) { 2232 subst_seq = TPC_ID_DCORE0_TPC6; 2233 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2234 } else { 2235 subst_seq = TPC_ID_DCORE3_TPC5; 2236 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2237 } 2238 2239 2240 /* clear bit from mask */ 2241 binned = __ffs(tpc_binning_mask); 2242 /* 2243 * Coverity complains about possible out-of-bound access in 2244 * clear_bit 2245 */ 2246 if (binned >= TPC_ID_SIZE) { 2247 dev_err(hdev->dev, 2248 "Invalid binned TPC (binning mask: %llx)\n", 2249 tpc_binning_mask); 2250 return -EINVAL; 2251 } 2252 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2253 2254 /* also clear replacing TPC bit from enabled mask */ 2255 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2256 2257 /* bin substite TPC's Qs */ 2258 q_props[qid_base].binned = 1; 2259 q_props[qid_base + 1].binned = 1; 2260 q_props[qid_base + 2].binned = 1; 2261 q_props[qid_base + 3].binned = 1; 2262 2263 subst_idx++; 2264 } 2265 2266 return 0; 2267 } 2268 2269 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2270 { 2271 struct asic_fixed_properties *prop = &hdev->asic_prop; 2272 u8 num_faulty; 2273 2274 num_faulty = hweight32(hdev->decoder_binning); 2275 2276 /* 2277 * check for error condition in which number of binning candidates 2278 * is higher than the maximum supported by the driver 2279 */ 2280 if (num_faulty > MAX_FAULTY_DECODERS) { 2281 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2282 hdev->decoder_binning); 2283 return -EINVAL; 2284 } 2285 2286 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2287 2288 if (prop->decoder_binning_mask) 2289 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2290 else 2291 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2292 2293 return 0; 2294 } 2295 2296 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2297 { 2298 struct asic_fixed_properties *prop = &hdev->asic_prop; 2299 2300 /* check if we should override default binning */ 2301 if (!hdev->dram_binning) { 2302 prop->dram_binning_mask = 0; 2303 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2304 return; 2305 } 2306 2307 /* set DRAM binning constraints */ 2308 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2309 prop->dram_binning_mask = hdev->dram_binning; 2310 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2311 } 2312 2313 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2314 { 2315 struct asic_fixed_properties *prop = &hdev->asic_prop; 2316 struct hw_queue_properties *q_props; 2317 u8 seq, num_faulty; 2318 2319 num_faulty = hweight32(hdev->edma_binning); 2320 2321 /* 2322 * check for error condition in which number of binning candidates 2323 * is higher than the maximum supported by the driver 2324 */ 2325 if (num_faulty > MAX_FAULTY_EDMAS) { 2326 dev_err(hdev->dev, 2327 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2328 hdev->edma_binning); 2329 return -EINVAL; 2330 } 2331 2332 if (!hdev->edma_binning) { 2333 prop->edma_binning_mask = 0; 2334 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2335 return 0; 2336 } 2337 2338 seq = __ffs((unsigned long)hdev->edma_binning); 2339 2340 /* set binning constraints */ 2341 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2342 prop->edma_binning_mask = hdev->edma_binning; 2343 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2344 2345 /* bin substitute EDMA's queue */ 2346 q_props = prop->hw_queues_props; 2347 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2348 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2349 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2350 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2351 2352 return 0; 2353 } 2354 2355 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2356 { 2357 struct asic_fixed_properties *prop = &hdev->asic_prop; 2358 u8 num_faulty, seq; 2359 2360 /* check if we should override default binning */ 2361 if (!xbar_edge_iso_mask) { 2362 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2363 return 0; 2364 } 2365 2366 /* 2367 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2368 * only the FW can set a redundancy value). for user it'll always be 0. 2369 */ 2370 num_faulty = hweight32(xbar_edge_iso_mask); 2371 2372 /* 2373 * check for error condition in which number of binning candidates 2374 * is higher than the maximum supported by the driver 2375 */ 2376 if (num_faulty > MAX_FAULTY_XBARS) { 2377 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2378 MAX_FAULTY_XBARS); 2379 return -EINVAL; 2380 } 2381 2382 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2383 2384 /* set binning constraints */ 2385 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2386 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2387 2388 return 0; 2389 } 2390 2391 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2392 { 2393 int rc; 2394 2395 /* 2396 * mark all clusters as good, each component will "fail" cluster 2397 * based on eFuse/user values. 2398 * If more than single cluster is faulty- the chip is unusable 2399 */ 2400 hdev->asic_prop.faulty_dram_cluster_map = 0; 2401 2402 gaudi2_set_dram_binning_masks(hdev); 2403 2404 rc = gaudi2_set_edma_binning_masks(hdev); 2405 if (rc) 2406 return rc; 2407 2408 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2409 if (rc) 2410 return rc; 2411 2412 2413 /* always initially set to full mask */ 2414 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2415 2416 return 0; 2417 } 2418 2419 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2420 { 2421 struct asic_fixed_properties *prop = &hdev->asic_prop; 2422 int rc; 2423 2424 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2425 if (rc) 2426 return rc; 2427 2428 /* if we have DRAM binning reported by FW we should perform cluster config */ 2429 if (prop->faulty_dram_cluster_map) { 2430 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2431 2432 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2433 } 2434 2435 return 0; 2436 } 2437 2438 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2439 { 2440 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2441 struct asic_fixed_properties *prop = &hdev->asic_prop; 2442 long max_power; 2443 u64 dram_size; 2444 int rc; 2445 2446 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2447 return 0; 2448 2449 /* No point of asking this information again when not doing hard reset, as the device 2450 * CPU hasn't been reset 2451 */ 2452 if (hdev->reset_info.in_compute_reset) 2453 return 0; 2454 2455 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2456 mmCPU_BOOT_ERR1); 2457 if (rc) 2458 return rc; 2459 2460 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2461 if (dram_size) { 2462 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2463 2464 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2465 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2466 dev_err(hdev->dev, 2467 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2468 dram_size, prop->dram_size); 2469 dram_size = prop->dram_size; 2470 } 2471 2472 prop->dram_size = dram_size; 2473 prop->dram_end_address = prop->dram_base_address + dram_size; 2474 } 2475 2476 if (!strlen(prop->cpucp_info.card_name)) 2477 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2478 2479 /* Overwrite binning masks with the actual binning values from F/W */ 2480 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2481 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2482 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2483 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2484 2485 /* 2486 * at this point the DRAM parameters need to be updated according to data obtained 2487 * from the FW 2488 */ 2489 rc = hdev->asic_funcs->set_dram_properties(hdev); 2490 if (rc) 2491 return rc; 2492 2493 rc = gaudi2_set_cluster_binning_masks(hdev); 2494 if (rc) 2495 return rc; 2496 2497 rc = gaudi2_set_tpc_binning_masks(hdev); 2498 if (rc) 2499 return rc; 2500 2501 rc = gaudi2_set_dec_binning_masks(hdev); 2502 if (rc) 2503 return rc; 2504 2505 max_power = hl_fw_get_max_power(hdev); 2506 if (max_power < 0) 2507 return max_power; 2508 2509 prop->max_power_default = (u64) max_power; 2510 2511 return 0; 2512 } 2513 2514 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2515 { 2516 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2517 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2518 int rc; 2519 2520 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2521 return 0; 2522 2523 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2524 if (rc) 2525 return rc; 2526 2527 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2528 2529 return 0; 2530 } 2531 2532 static int gaudi2_early_init(struct hl_device *hdev) 2533 { 2534 struct asic_fixed_properties *prop = &hdev->asic_prop; 2535 struct pci_dev *pdev = hdev->pdev; 2536 resource_size_t pci_bar_size; 2537 int rc; 2538 2539 rc = gaudi2_set_fixed_properties(hdev); 2540 if (rc) 2541 return rc; 2542 2543 /* Check BAR sizes */ 2544 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2545 2546 if (pci_bar_size != CFG_BAR_SIZE) { 2547 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2548 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2549 rc = -ENODEV; 2550 goto free_queue_props; 2551 } 2552 2553 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2554 if (pci_bar_size != MSIX_BAR_SIZE) { 2555 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2556 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2557 rc = -ENODEV; 2558 goto free_queue_props; 2559 } 2560 2561 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2562 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2563 2564 /* 2565 * Only in pldm driver config iATU 2566 */ 2567 if (hdev->pldm) 2568 hdev->asic_prop.iatu_done_by_fw = false; 2569 else 2570 hdev->asic_prop.iatu_done_by_fw = true; 2571 2572 rc = hl_pci_init(hdev); 2573 if (rc) 2574 goto free_queue_props; 2575 2576 /* Before continuing in the initialization, we need to read the preboot 2577 * version to determine whether we run with a security-enabled firmware 2578 */ 2579 rc = hl_fw_read_preboot_status(hdev); 2580 if (rc) { 2581 if (hdev->reset_on_preboot_fail) 2582 hdev->asic_funcs->hw_fini(hdev, true, false); 2583 goto pci_fini; 2584 } 2585 2586 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2587 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2588 hdev->asic_funcs->hw_fini(hdev, true, false); 2589 } 2590 2591 return 0; 2592 2593 pci_fini: 2594 hl_pci_fini(hdev); 2595 free_queue_props: 2596 kfree(hdev->asic_prop.hw_queues_props); 2597 return rc; 2598 } 2599 2600 static int gaudi2_early_fini(struct hl_device *hdev) 2601 { 2602 kfree(hdev->asic_prop.hw_queues_props); 2603 hl_pci_fini(hdev); 2604 2605 return 0; 2606 } 2607 2608 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 2609 { 2610 switch (arc_id) { 2611 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 2612 return true; 2613 default: 2614 return false; 2615 } 2616 } 2617 2618 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 2619 { 2620 switch (arc_id) { 2621 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 2622 return true; 2623 default: 2624 return false; 2625 } 2626 } 2627 2628 static void gaudi2_init_arcs(struct hl_device *hdev) 2629 { 2630 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2631 u64 arc_id; 2632 u32 i; 2633 2634 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 2635 if (gaudi2_is_arc_enabled(hdev, i)) 2636 continue; 2637 2638 gaudi2_set_arc_id_cap(hdev, i); 2639 } 2640 2641 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 2642 if (!gaudi2_is_queue_enabled(hdev, i)) 2643 continue; 2644 2645 arc_id = gaudi2_queue_id_to_arc_id[i]; 2646 if (gaudi2_is_arc_enabled(hdev, arc_id)) 2647 continue; 2648 2649 if (gaudi2_is_arc_nic_owned(arc_id) && 2650 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 2651 continue; 2652 2653 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 2654 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 2655 continue; 2656 2657 gaudi2_set_arc_id_cap(hdev, arc_id); 2658 } 2659 } 2660 2661 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 2662 { 2663 u32 reg_base, reg_val; 2664 int rc; 2665 2666 switch (cpu_id) { 2667 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 2668 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 2669 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2670 ARC_DCCM_BLOCK_SIZE * 2, true); 2671 if (rc) 2672 return rc; 2673 break; 2674 case CPU_ID_SCHED_ARC4: 2675 case CPU_ID_SCHED_ARC5: 2676 case CPU_ID_MME_QMAN_ARC0: 2677 case CPU_ID_MME_QMAN_ARC1: 2678 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 2679 2680 /* Scrub lower DCCM block */ 2681 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2682 ARC_DCCM_BLOCK_SIZE, true); 2683 if (rc) 2684 return rc; 2685 2686 /* Switch to upper DCCM block */ 2687 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 2688 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2689 2690 /* Scrub upper DCCM block */ 2691 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2692 ARC_DCCM_BLOCK_SIZE, true); 2693 if (rc) 2694 return rc; 2695 2696 /* Switch to lower DCCM block */ 2697 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 2698 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2699 break; 2700 default: 2701 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2702 ARC_DCCM_BLOCK_SIZE, true); 2703 if (rc) 2704 return rc; 2705 } 2706 2707 return 0; 2708 } 2709 2710 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 2711 { 2712 u16 arc_id; 2713 2714 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 2715 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 2716 continue; 2717 2718 gaudi2_scrub_arc_dccm(hdev, arc_id); 2719 } 2720 } 2721 2722 static int gaudi2_late_init(struct hl_device *hdev) 2723 { 2724 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2725 int rc; 2726 2727 hdev->asic_prop.supports_advanced_cpucp_rc = true; 2728 2729 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 2730 gaudi2->virt_msix_db_dma_addr); 2731 if (rc) { 2732 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 2733 return rc; 2734 } 2735 2736 rc = gaudi2_fetch_psoc_frequency(hdev); 2737 if (rc) { 2738 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 2739 goto disable_pci_access; 2740 } 2741 2742 gaudi2_init_arcs(hdev); 2743 gaudi2_scrub_arcs_dccm(hdev); 2744 gaudi2_init_security(hdev); 2745 2746 return 0; 2747 2748 disable_pci_access: 2749 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2750 2751 return rc; 2752 } 2753 2754 static void gaudi2_late_fini(struct hl_device *hdev) 2755 { 2756 hl_hwmon_release_resources(hdev); 2757 } 2758 2759 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 2760 { 2761 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2762 2763 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2764 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2765 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2766 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2767 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2768 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2769 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2770 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2771 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2772 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2773 } 2774 2775 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 2776 { 2777 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2778 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2779 u32 block_size, umr_start_idx, num_umr_blocks; 2780 int i; 2781 2782 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 2783 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 2784 block_size = ARC_DCCM_BLOCK_SIZE * 2; 2785 else 2786 block_size = ARC_DCCM_BLOCK_SIZE; 2787 2788 blocks[i].address = gaudi2_arc_dccm_bases[i]; 2789 blocks[i].size = block_size; 2790 } 2791 2792 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 2793 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 2794 2795 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 2796 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 2797 2798 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 2799 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 2800 2801 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 2802 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 2803 2804 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 2805 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 2806 2807 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 2808 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 2809 2810 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 2811 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 2812 2813 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 2814 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 2815 2816 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 2817 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 2818 for (i = 0 ; i < num_umr_blocks ; i++) { 2819 u8 nic_id, umr_block_id; 2820 2821 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 2822 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 2823 2824 blocks[umr_start_idx + i].address = 2825 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 2826 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 2827 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 2828 umr_block_id * NIC_UMR_OFFSET; 2829 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 2830 } 2831 2832 /* Expose decoder HW configuration block to user */ 2833 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 2834 2835 for (i = 1; i < NUM_OF_DCORES; ++i) { 2836 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 2837 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 2838 2839 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 2840 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 2841 2842 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 2843 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 2844 } 2845 } 2846 2847 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 2848 { 2849 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 2850 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 2851 int i, j, rc = 0; 2852 2853 /* The device ARC works with 32-bits addresses, and because there is a single HW register 2854 * that holds the extension bits (49..28), these bits must be identical in all the allocated 2855 * range. 2856 */ 2857 2858 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 2859 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 2860 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 2861 if (!virt_addr_arr[i]) { 2862 rc = -ENOMEM; 2863 goto free_dma_mem_arr; 2864 } 2865 2866 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 2867 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 2868 break; 2869 } 2870 2871 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 2872 dev_err(hdev->dev, 2873 "MSB of ARC accessible DMA memory are not identical in all range\n"); 2874 rc = -EFAULT; 2875 goto free_dma_mem_arr; 2876 } 2877 2878 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 2879 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 2880 2881 free_dma_mem_arr: 2882 for (j = 0 ; j < i ; j++) 2883 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 2884 dma_addr_arr[j]); 2885 2886 return rc; 2887 } 2888 2889 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 2890 { 2891 struct asic_fixed_properties *prop = &hdev->asic_prop; 2892 struct pci_mem_region *region; 2893 2894 /* CFG */ 2895 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 2896 region->region_base = CFG_BASE; 2897 region->region_size = CFG_SIZE; 2898 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 2899 region->bar_size = CFG_BAR_SIZE; 2900 region->bar_id = SRAM_CFG_BAR_ID; 2901 region->used = 1; 2902 2903 /* SRAM */ 2904 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 2905 region->region_base = SRAM_BASE_ADDR; 2906 region->region_size = SRAM_SIZE; 2907 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 2908 region->bar_size = CFG_BAR_SIZE; 2909 region->bar_id = SRAM_CFG_BAR_ID; 2910 region->used = 1; 2911 2912 /* DRAM */ 2913 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 2914 region->region_base = DRAM_PHYS_BASE; 2915 region->region_size = hdev->asic_prop.dram_size; 2916 region->offset_in_bar = 0; 2917 region->bar_size = prop->dram_pci_bar_size; 2918 region->bar_id = DRAM_BAR_ID; 2919 region->used = 1; 2920 } 2921 2922 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 2923 { 2924 struct asic_fixed_properties *prop = &hdev->asic_prop; 2925 int i, j, k; 2926 2927 /* Initialize common user CQ interrupt */ 2928 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 2929 HL_COMMON_USER_CQ_INTERRUPT_ID, false); 2930 2931 /* Initialize common decoder interrupt */ 2932 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 2933 HL_COMMON_DEC_INTERRUPT_ID, true); 2934 2935 /* User interrupts structure holds both decoder and user interrupts from various engines. 2936 * We first initialize the decoder interrupts and then we add the user interrupts. 2937 * The only limitation is that the last decoder interrupt id must be smaller 2938 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 2939 */ 2940 2941 /* Initialize decoder interrupts, expose only normal interrupts, 2942 * error interrupts to be handled by driver 2943 */ 2944 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 2945 i += 2, j++) 2946 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true); 2947 2948 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 2949 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false); 2950 } 2951 2952 static inline int gaudi2_get_non_zero_random_int(void) 2953 { 2954 int rand = get_random_u32(); 2955 2956 return rand ? rand : 1; 2957 } 2958 2959 static int gaudi2_sw_init(struct hl_device *hdev) 2960 { 2961 struct asic_fixed_properties *prop = &hdev->asic_prop; 2962 struct gaudi2_device *gaudi2; 2963 int i, rc; 2964 2965 /* Allocate device structure */ 2966 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 2967 if (!gaudi2) 2968 return -ENOMEM; 2969 2970 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 2971 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 2972 continue; 2973 2974 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 2975 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 2976 GAUDI2_EVENT_SIZE); 2977 rc = -EINVAL; 2978 goto free_gaudi2_device; 2979 } 2980 2981 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 2982 } 2983 2984 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 2985 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 2986 2987 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 2988 2989 hdev->asic_specific = gaudi2; 2990 2991 /* Create DMA pool for small allocations. 2992 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 2993 * PI/CI registers allocated from this pool have this restriction 2994 */ 2995 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 2996 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 2997 if (!hdev->dma_pool) { 2998 dev_err(hdev->dev, "failed to create DMA pool\n"); 2999 rc = -ENOMEM; 3000 goto free_gaudi2_device; 3001 } 3002 3003 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3004 if (rc) 3005 goto free_dma_pool; 3006 3007 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3008 if (!hdev->cpu_accessible_dma_pool) { 3009 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3010 rc = -ENOMEM; 3011 goto free_cpu_dma_mem; 3012 } 3013 3014 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3015 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3016 if (rc) { 3017 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3018 rc = -EFAULT; 3019 goto free_cpu_accessible_dma_pool; 3020 } 3021 3022 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3023 &gaudi2->virt_msix_db_dma_addr); 3024 if (!gaudi2->virt_msix_db_cpu_addr) { 3025 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3026 rc = -ENOMEM; 3027 goto free_cpu_accessible_dma_pool; 3028 } 3029 3030 spin_lock_init(&gaudi2->hw_queues_lock); 3031 3032 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3033 &gaudi2->scratchpad_bus_address, 3034 GFP_KERNEL | __GFP_ZERO); 3035 if (!gaudi2->scratchpad_kernel_address) { 3036 rc = -ENOMEM; 3037 goto free_virt_msix_db_mem; 3038 } 3039 3040 gaudi2_user_mapped_blocks_init(hdev); 3041 3042 /* Initialize user interrupts */ 3043 gaudi2_user_interrupt_setup(hdev); 3044 3045 hdev->supports_coresight = true; 3046 hdev->supports_sync_stream = true; 3047 hdev->supports_cb_mapping = true; 3048 hdev->supports_wait_for_multi_cs = false; 3049 3050 prop->supports_compute_reset = true; 3051 3052 hdev->asic_funcs->set_pci_memory_regions(hdev); 3053 3054 return 0; 3055 3056 free_virt_msix_db_mem: 3057 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3058 free_cpu_accessible_dma_pool: 3059 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3060 free_cpu_dma_mem: 3061 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3062 hdev->cpu_accessible_dma_address); 3063 free_dma_pool: 3064 dma_pool_destroy(hdev->dma_pool); 3065 free_gaudi2_device: 3066 kfree(gaudi2); 3067 return rc; 3068 } 3069 3070 static int gaudi2_sw_fini(struct hl_device *hdev) 3071 { 3072 struct asic_fixed_properties *prop = &hdev->asic_prop; 3073 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3074 3075 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3076 3077 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3078 3079 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3080 hdev->cpu_accessible_dma_address); 3081 3082 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3083 gaudi2->scratchpad_bus_address); 3084 3085 dma_pool_destroy(hdev->dma_pool); 3086 3087 kfree(gaudi2); 3088 3089 return 0; 3090 } 3091 3092 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3093 { 3094 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3095 QM_GLBL_CFG1_CQF_STOP | 3096 QM_GLBL_CFG1_CP_STOP); 3097 3098 /* stop also the ARC */ 3099 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3100 } 3101 3102 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3103 { 3104 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3105 QM_GLBL_CFG1_CQF_FLUSH | 3106 QM_GLBL_CFG1_CP_FLUSH); 3107 } 3108 3109 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3110 { 3111 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3112 } 3113 3114 /** 3115 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3116 * 3117 * @hdev: pointer to the habanalabs device structure 3118 * @queue_id: queue to clear fence counters to 3119 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3120 * getting stuck on any fence value. otherwise set all fence 3121 * counters to 0 (standard clear of fence counters) 3122 */ 3123 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3124 bool skip_fence) 3125 { 3126 u32 size, reg_base; 3127 u32 addr, val; 3128 3129 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3130 3131 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3132 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3133 3134 /* 3135 * in case we want to make sure that QM that is stuck on a fence will 3136 * be released we should set the fence counter to a higher value that 3137 * the value the QM waiting for. to comply with any fence counter of 3138 * any value we set maximum fence value to all counters 3139 */ 3140 val = skip_fence ? U32_MAX : 0; 3141 gaudi2_memset_device_lbw(hdev, addr, size, val); 3142 } 3143 3144 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3145 { 3146 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3147 3148 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3149 gaudi2_flush_qman_common(hdev, reg_base); 3150 gaudi2_flush_qman_arc_common(hdev, reg_base); 3151 } 3152 3153 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3154 { 3155 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3156 int dcore, inst; 3157 3158 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3159 goto stop_edma_qmans; 3160 3161 /* Stop CPs of PDMA QMANs */ 3162 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3163 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3164 3165 stop_edma_qmans: 3166 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3167 return; 3168 3169 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3170 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3171 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3172 u32 qm_base; 3173 3174 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3175 continue; 3176 3177 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3178 inst * DCORE_EDMA_OFFSET; 3179 3180 /* Stop CPs of EDMA QMANs */ 3181 gaudi2_stop_qman_common(hdev, qm_base); 3182 } 3183 } 3184 } 3185 3186 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3187 { 3188 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3189 u32 offset, i; 3190 3191 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3192 3193 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3194 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3195 continue; 3196 3197 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3198 } 3199 } 3200 3201 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3202 { 3203 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3204 u32 reg_base; 3205 int i; 3206 3207 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3208 return; 3209 3210 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3211 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3212 continue; 3213 3214 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3215 gaudi2_stop_qman_common(hdev, reg_base); 3216 } 3217 } 3218 3219 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3220 { 3221 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3222 u32 reg_base; 3223 int i; 3224 3225 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3226 return; 3227 3228 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3229 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3230 continue; 3231 3232 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3233 gaudi2_stop_qman_common(hdev, reg_base); 3234 } 3235 } 3236 3237 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3238 { 3239 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3240 u32 reg_base, queue_id; 3241 int i; 3242 3243 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3244 return; 3245 3246 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3247 3248 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3249 if (!(hdev->nic_ports_mask & BIT(i))) 3250 continue; 3251 3252 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3253 gaudi2_stop_qman_common(hdev, reg_base); 3254 } 3255 } 3256 3257 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3258 { 3259 u32 reg_val; 3260 3261 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3262 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3263 } 3264 3265 static void gaudi2_dma_stall(struct hl_device *hdev) 3266 { 3267 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3268 int dcore, inst; 3269 3270 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3271 goto stall_edma; 3272 3273 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3274 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3275 3276 stall_edma: 3277 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3278 return; 3279 3280 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3281 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3282 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3283 u32 core_base; 3284 3285 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3286 continue; 3287 3288 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3289 inst * DCORE_EDMA_OFFSET; 3290 3291 /* Stall CPs of EDMA QMANs */ 3292 gaudi2_stall_dma_common(hdev, core_base); 3293 } 3294 } 3295 } 3296 3297 static void gaudi2_mme_stall(struct hl_device *hdev) 3298 { 3299 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3300 u32 offset, i; 3301 3302 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3303 3304 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3305 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3306 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3307 } 3308 3309 static void gaudi2_tpc_stall(struct hl_device *hdev) 3310 { 3311 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3312 u32 reg_base; 3313 int i; 3314 3315 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3316 return; 3317 3318 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3319 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3320 continue; 3321 3322 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3323 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3324 } 3325 } 3326 3327 static void gaudi2_rotator_stall(struct hl_device *hdev) 3328 { 3329 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3330 u32 reg_val; 3331 int i; 3332 3333 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3334 return; 3335 3336 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3337 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3338 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3339 3340 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3341 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3342 continue; 3343 3344 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3345 } 3346 } 3347 3348 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3349 { 3350 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3351 } 3352 3353 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3354 { 3355 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3356 int dcore, inst; 3357 3358 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3359 goto stop_edma_qmans; 3360 3361 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3362 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3363 3364 stop_edma_qmans: 3365 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3366 return; 3367 3368 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3369 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3370 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3371 u32 qm_base; 3372 3373 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3374 continue; 3375 3376 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3377 inst * DCORE_EDMA_OFFSET; 3378 3379 /* Disable CPs of EDMA QMANs */ 3380 gaudi2_disable_qman_common(hdev, qm_base); 3381 } 3382 } 3383 } 3384 3385 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3386 { 3387 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3388 u32 offset, i; 3389 3390 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3391 3392 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3393 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3394 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3395 } 3396 3397 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3398 { 3399 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3400 u32 reg_base; 3401 int i; 3402 3403 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3404 return; 3405 3406 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3407 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3408 continue; 3409 3410 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3411 gaudi2_disable_qman_common(hdev, reg_base); 3412 } 3413 } 3414 3415 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 3416 { 3417 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3418 u32 reg_base; 3419 int i; 3420 3421 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3422 return; 3423 3424 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3425 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3426 continue; 3427 3428 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3429 gaudi2_disable_qman_common(hdev, reg_base); 3430 } 3431 } 3432 3433 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 3434 { 3435 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3436 u32 reg_base, queue_id; 3437 int i; 3438 3439 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3440 return; 3441 3442 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3443 3444 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3445 if (!(hdev->nic_ports_mask & BIT(i))) 3446 continue; 3447 3448 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3449 gaudi2_disable_qman_common(hdev, reg_base); 3450 } 3451 } 3452 3453 static void gaudi2_enable_timestamp(struct hl_device *hdev) 3454 { 3455 /* Disable the timestamp counter */ 3456 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3457 3458 /* Zero the lower/upper parts of the 64-bit counter */ 3459 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 3460 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 3461 3462 /* Enable the counter */ 3463 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 3464 } 3465 3466 static void gaudi2_disable_timestamp(struct hl_device *hdev) 3467 { 3468 /* Disable the timestamp counter */ 3469 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3470 } 3471 3472 static const char *gaudi2_irq_name(u16 irq_number) 3473 { 3474 switch (irq_number) { 3475 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 3476 return "gaudi2 cpu eq"; 3477 case GAUDI2_IRQ_NUM_COMPLETION: 3478 return "gaudi2 completion"; 3479 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 3480 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 3481 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 3482 return "gaudi2 user completion"; 3483 default: 3484 return "invalid"; 3485 } 3486 } 3487 3488 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 3489 { 3490 int i, irq, relative_idx; 3491 struct hl_dec *dec; 3492 3493 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 3494 irq = pci_irq_vector(hdev->pdev, i); 3495 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3496 3497 dec = hdev->dec + relative_idx / 2; 3498 3499 /* We pass different structures depending on the irq handler. For the abnormal 3500 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3501 * user_interrupt entry 3502 */ 3503 free_irq(irq, ((relative_idx % 2) ? 3504 (void *) dec : 3505 (void *) &hdev->user_interrupt[dec->core_id])); 3506 } 3507 } 3508 3509 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 3510 { 3511 int rc, i, irq_init_cnt, irq, relative_idx; 3512 irq_handler_t irq_handler; 3513 struct hl_dec *dec; 3514 3515 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 3516 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 3517 i++, irq_init_cnt++) { 3518 3519 irq = pci_irq_vector(hdev->pdev, i); 3520 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3521 3522 irq_handler = (relative_idx % 2) ? 3523 hl_irq_handler_dec_abnrm : 3524 hl_irq_handler_user_interrupt; 3525 3526 dec = hdev->dec + relative_idx / 2; 3527 3528 /* We pass different structures depending on the irq handler. For the abnormal 3529 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3530 * user_interrupt entry 3531 */ 3532 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), 3533 ((relative_idx % 2) ? 3534 (void *) dec : 3535 (void *) &hdev->user_interrupt[dec->core_id])); 3536 if (rc) { 3537 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3538 goto free_dec_irqs; 3539 } 3540 } 3541 3542 return 0; 3543 3544 free_dec_irqs: 3545 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 3546 return rc; 3547 } 3548 3549 static int gaudi2_enable_msix(struct hl_device *hdev) 3550 { 3551 struct asic_fixed_properties *prop = &hdev->asic_prop; 3552 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3553 int rc, irq, i, j, user_irq_init_cnt; 3554 irq_handler_t irq_handler; 3555 struct hl_cq *cq; 3556 3557 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 3558 return 0; 3559 3560 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 3561 PCI_IRQ_MSIX); 3562 if (rc < 0) { 3563 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 3564 GAUDI2_MSIX_ENTRIES, rc); 3565 return rc; 3566 } 3567 3568 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3569 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3570 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 3571 if (rc) { 3572 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3573 goto free_irq_vectors; 3574 } 3575 3576 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3577 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 3578 &hdev->event_queue); 3579 if (rc) { 3580 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3581 goto free_completion_irq; 3582 } 3583 3584 rc = gaudi2_dec_enable_msix(hdev); 3585 if (rc) { 3586 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 3587 goto free_event_irq; 3588 } 3589 3590 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 3591 user_irq_init_cnt < prop->user_interrupt_count; 3592 i++, j++, user_irq_init_cnt++) { 3593 3594 irq = pci_irq_vector(hdev->pdev, i); 3595 irq_handler = hl_irq_handler_user_interrupt; 3596 3597 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); 3598 if (rc) { 3599 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3600 goto free_user_irq; 3601 } 3602 } 3603 3604 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 3605 3606 return 0; 3607 3608 free_user_irq: 3609 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 3610 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 3611 3612 irq = pci_irq_vector(hdev->pdev, i); 3613 free_irq(irq, &hdev->user_interrupt[j]); 3614 } 3615 3616 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3617 3618 free_event_irq: 3619 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3620 free_irq(irq, cq); 3621 3622 free_completion_irq: 3623 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3624 free_irq(irq, cq); 3625 3626 free_irq_vectors: 3627 pci_free_irq_vectors(hdev->pdev); 3628 3629 return rc; 3630 } 3631 3632 static void gaudi2_sync_irqs(struct hl_device *hdev) 3633 { 3634 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3635 int i, j; 3636 int irq; 3637 3638 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3639 return; 3640 3641 /* Wait for all pending IRQs to be finished */ 3642 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 3643 3644 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 3645 irq = pci_irq_vector(hdev->pdev, i); 3646 synchronize_irq(irq); 3647 } 3648 3649 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 3650 i++, j++) { 3651 irq = pci_irq_vector(hdev->pdev, i); 3652 synchronize_irq(irq); 3653 } 3654 3655 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 3656 } 3657 3658 static void gaudi2_disable_msix(struct hl_device *hdev) 3659 { 3660 struct asic_fixed_properties *prop = &hdev->asic_prop; 3661 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3662 struct hl_cq *cq; 3663 int irq, i, j, k; 3664 3665 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3666 return; 3667 3668 gaudi2_sync_irqs(hdev); 3669 3670 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3671 free_irq(irq, &hdev->event_queue); 3672 3673 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3674 3675 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 3676 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 3677 3678 irq = pci_irq_vector(hdev->pdev, i); 3679 free_irq(irq, &hdev->user_interrupt[j]); 3680 } 3681 3682 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3683 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3684 free_irq(irq, cq); 3685 3686 pci_free_irq_vectors(hdev->pdev); 3687 3688 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 3689 } 3690 3691 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 3692 { 3693 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3694 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3695 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3696 int rc; 3697 3698 if (hdev->pldm) 3699 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3700 else 3701 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3702 3703 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3704 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 3705 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3706 continue; 3707 3708 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 3709 3710 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 3711 3712 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3713 3714 /* Wait till all traffic from decoder stops 3715 * before apply core reset. 3716 */ 3717 rc = hl_poll_timeout( 3718 hdev, 3719 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3720 graceful, 3721 (graceful & graceful_pend_mask), 3722 100, 3723 timeout_usec); 3724 if (rc) 3725 dev_err(hdev->dev, 3726 "Failed to stop traffic from DCORE%d Decoder %d\n", 3727 dcore_id, dec_id); 3728 } 3729 } 3730 3731 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 3732 { 3733 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3734 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3735 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3736 int rc; 3737 3738 if (hdev->pldm) 3739 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3740 else 3741 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3742 3743 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3744 dec_bit = PCIE_DEC_SHIFT + dec_id; 3745 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3746 continue; 3747 3748 offset = dec_id * PCIE_VDEC_OFFSET; 3749 3750 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 3751 3752 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3753 3754 /* Wait till all traffic from decoder stops 3755 * before apply core reset. 3756 */ 3757 rc = hl_poll_timeout( 3758 hdev, 3759 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3760 graceful, 3761 (graceful & graceful_pend_mask), 3762 100, 3763 timeout_usec); 3764 if (rc) 3765 dev_err(hdev->dev, 3766 "Failed to stop traffic from PCIe Decoder %d\n", 3767 dec_id); 3768 } 3769 } 3770 3771 static void gaudi2_stop_dec(struct hl_device *hdev) 3772 { 3773 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3774 int dcore_id; 3775 3776 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 3777 return; 3778 3779 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 3780 gaudi2_stop_dcore_dec(hdev, dcore_id); 3781 3782 gaudi2_stop_pcie_dec(hdev); 3783 } 3784 3785 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3786 { 3787 u32 reg_base, reg_val; 3788 3789 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3790 if (run_mode == HL_ENGINE_CORE_RUN) 3791 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 3792 else 3793 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3794 3795 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 3796 } 3797 3798 static void gaudi2_halt_arcs(struct hl_device *hdev) 3799 { 3800 u16 arc_id; 3801 3802 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 3803 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3804 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 3805 } 3806 } 3807 3808 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3809 { 3810 int rc; 3811 u32 reg_base, val, ack_mask, timeout_usec = 100000; 3812 3813 if (hdev->pldm) 3814 timeout_usec *= 100; 3815 3816 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3817 if (run_mode == HL_ENGINE_CORE_RUN) 3818 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 3819 else 3820 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 3821 3822 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 3823 val, ((val & ack_mask) == ack_mask), 3824 1000, timeout_usec); 3825 3826 if (!rc) { 3827 /* Clear */ 3828 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 3829 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 3830 } 3831 3832 return rc; 3833 } 3834 3835 static void gaudi2_reset_arcs(struct hl_device *hdev) 3836 { 3837 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3838 u16 arc_id; 3839 3840 if (!gaudi2) 3841 return; 3842 3843 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 3844 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3845 gaudi2_clr_arc_id_cap(hdev, arc_id); 3846 } 3847 3848 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 3849 { 3850 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3851 u32 queue_id; 3852 int i; 3853 3854 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3855 return; 3856 3857 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3858 3859 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3860 if (!(hdev->nic_ports_mask & BIT(i))) 3861 continue; 3862 3863 gaudi2_qman_manual_flush_common(hdev, queue_id); 3864 } 3865 } 3866 3867 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 3868 u32 num_cores, u32 core_command) 3869 { 3870 int i, rc; 3871 3872 3873 for (i = 0 ; i < num_cores ; i++) { 3874 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 3875 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 3876 } 3877 3878 for (i = 0 ; i < num_cores ; i++) { 3879 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 3880 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 3881 3882 if (rc) { 3883 dev_err(hdev->dev, "failed to %s arc: %d\n", 3884 (core_command == HL_ENGINE_CORE_HALT) ? 3885 "HALT" : "RUN", core_ids[i]); 3886 return -1; 3887 } 3888 } 3889 } 3890 3891 return 0; 3892 } 3893 3894 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3895 { 3896 u32 wait_timeout_ms; 3897 3898 if (hdev->pldm) 3899 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 3900 else 3901 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 3902 3903 if (fw_reset) 3904 goto skip_engines; 3905 3906 gaudi2_stop_dma_qmans(hdev); 3907 gaudi2_stop_mme_qmans(hdev); 3908 gaudi2_stop_tpc_qmans(hdev); 3909 gaudi2_stop_rot_qmans(hdev); 3910 gaudi2_stop_nic_qmans(hdev); 3911 msleep(wait_timeout_ms); 3912 3913 gaudi2_halt_arcs(hdev); 3914 gaudi2_dma_stall(hdev); 3915 gaudi2_mme_stall(hdev); 3916 gaudi2_tpc_stall(hdev); 3917 gaudi2_rotator_stall(hdev); 3918 3919 msleep(wait_timeout_ms); 3920 3921 gaudi2_stop_dec(hdev); 3922 3923 /* 3924 * in case of soft reset do a manual flush for QMANs (currently called 3925 * only for NIC QMANs 3926 */ 3927 if (!hard_reset) 3928 gaudi2_nic_qmans_manual_flush(hdev); 3929 3930 gaudi2_disable_dma_qmans(hdev); 3931 gaudi2_disable_mme_qmans(hdev); 3932 gaudi2_disable_tpc_qmans(hdev); 3933 gaudi2_disable_rot_qmans(hdev); 3934 gaudi2_disable_nic_qmans(hdev); 3935 gaudi2_disable_timestamp(hdev); 3936 3937 skip_engines: 3938 if (hard_reset) { 3939 gaudi2_disable_msix(hdev); 3940 return; 3941 } 3942 3943 gaudi2_sync_irqs(hdev); 3944 } 3945 3946 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 3947 { 3948 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3949 3950 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3951 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3952 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3953 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3954 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3955 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 3956 } 3957 3958 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 3959 { 3960 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3961 struct dynamic_fw_load_mgr *dynamic_loader; 3962 struct cpu_dyn_regs *dyn_regs; 3963 3964 /* fill common fields */ 3965 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3966 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 3967 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 3968 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 3969 fw_loader->skip_bmc = false; 3970 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 3971 fw_loader->dram_bar_id = DRAM_BAR_ID; 3972 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 3973 3974 /* here we update initial values for few specific dynamic regs (as 3975 * before reading the first descriptor from FW those value has to be 3976 * hard-coded). in later stages of the protocol those values will be 3977 * updated automatically by reading the FW descriptor so data there 3978 * will always be up-to-date 3979 */ 3980 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3981 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3982 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3983 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3984 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 3985 } 3986 3987 static int gaudi2_init_cpu(struct hl_device *hdev) 3988 { 3989 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3990 int rc; 3991 3992 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3993 return 0; 3994 3995 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 3996 return 0; 3997 3998 rc = hl_fw_init_cpu(hdev); 3999 if (rc) 4000 return rc; 4001 4002 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4003 4004 return 0; 4005 } 4006 4007 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4008 { 4009 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4010 struct asic_fixed_properties *prop = &hdev->asic_prop; 4011 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4012 struct cpu_dyn_regs *dyn_regs; 4013 struct hl_eq *eq; 4014 u32 status; 4015 int err; 4016 4017 if (!hdev->cpu_queues_enable) 4018 return 0; 4019 4020 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4021 return 0; 4022 4023 eq = &hdev->event_queue; 4024 4025 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4026 4027 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4028 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4029 4030 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4031 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4032 4033 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4034 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4035 4036 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4037 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4038 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4039 4040 /* Used for EQ CI */ 4041 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4042 4043 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4044 4045 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4046 4047 /* Let the ARC know we are ready as it is now handling those queues */ 4048 4049 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4050 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4051 4052 err = hl_poll_timeout( 4053 hdev, 4054 mmCPU_IF_QUEUE_INIT, 4055 status, 4056 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4057 1000, 4058 cpu_timeout); 4059 4060 if (err) { 4061 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4062 return -EIO; 4063 } 4064 4065 /* update FW application security bits */ 4066 if (prop->fw_cpu_boot_dev_sts0_valid) 4067 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4068 4069 if (prop->fw_cpu_boot_dev_sts1_valid) 4070 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4071 4072 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4073 return 0; 4074 } 4075 4076 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4077 u32 queue_id_base) 4078 { 4079 struct hl_hw_queue *q; 4080 u32 pq_id, pq_offset; 4081 4082 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4083 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4084 pq_offset = pq_id * 4; 4085 4086 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4087 lower_32_bits(q->bus_address)); 4088 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4089 upper_32_bits(q->bus_address)); 4090 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4091 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4092 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4093 } 4094 } 4095 4096 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4097 { 4098 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4099 4100 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4101 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4102 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4103 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4104 4105 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4106 cp_offset = cp_id * 4; 4107 4108 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4109 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4110 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4111 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4112 } 4113 4114 /* allow QMANs to accept work from ARC CQF */ 4115 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4116 } 4117 4118 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4119 u32 queue_id_base) 4120 { 4121 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4122 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4123 4124 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4125 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4126 4127 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4128 pq_offset = pq_id * 4; 4129 4130 /* Configure QMAN HBW to scratchpad as it is not needed */ 4131 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4132 lower_32_bits(gaudi2->scratchpad_bus_address)); 4133 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4134 upper_32_bits(gaudi2->scratchpad_bus_address)); 4135 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4136 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4137 4138 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4139 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4140 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4141 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4142 } 4143 4144 /* Enable QMAN H/W completion */ 4145 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4146 } 4147 4148 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4149 { 4150 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4151 u32 sp_reg_addr; 4152 4153 switch (queue_id_base) { 4154 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4155 fallthrough; 4156 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4157 fallthrough; 4158 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4159 fallthrough; 4160 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4161 fallthrough; 4162 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4163 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4164 break; 4165 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4166 fallthrough; 4167 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4168 fallthrough; 4169 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4170 fallthrough; 4171 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4172 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4173 break; 4174 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4175 fallthrough; 4176 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4177 fallthrough; 4178 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4179 fallthrough; 4180 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4181 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4182 break; 4183 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4184 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4185 break; 4186 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4187 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4188 break; 4189 default: 4190 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4191 return 0; 4192 } 4193 4194 return sp_reg_addr; 4195 } 4196 4197 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4198 u32 queue_id_base) 4199 { 4200 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4201 int map_table_entry; 4202 4203 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4204 4205 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4206 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4207 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4208 4209 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4210 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4211 gaudi2_irq_map_table[map_table_entry].cpu_id); 4212 4213 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4214 4215 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4216 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4217 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4218 4219 /* Enable the QMAN channel. 4220 * PDMA QMAN configuration is different, as we do not allow user to 4221 * access some of the CPs. 4222 * PDMA0: CP2/3 are reserved for the ARC usage. 4223 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4224 */ 4225 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4226 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4227 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4228 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4229 else 4230 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4231 } 4232 4233 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4234 u32 queue_id_base) 4235 { 4236 u32 pq_id; 4237 4238 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4239 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4240 4241 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4242 gaudi2_init_qman_cp(hdev, reg_base); 4243 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4244 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4245 } 4246 4247 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 4248 u32 dma_core_id, bool is_secure) 4249 { 4250 u32 prot, irq_handler_offset; 4251 struct cpu_dyn_regs *dyn_regs; 4252 int map_table_entry; 4253 4254 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 4255 if (is_secure) 4256 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 4257 4258 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 4259 4260 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4261 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 4262 4263 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 4264 lower_32_bits(CFG_BASE + irq_handler_offset)); 4265 4266 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 4267 upper_32_bits(CFG_BASE + irq_handler_offset)); 4268 4269 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 4270 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 4271 gaudi2_irq_map_table[map_table_entry].cpu_id); 4272 4273 /* Enable the DMA channel */ 4274 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 4275 } 4276 4277 static void gaudi2_init_kdma(struct hl_device *hdev) 4278 { 4279 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4280 u32 reg_base; 4281 4282 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 4283 return; 4284 4285 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 4286 4287 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 4288 4289 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 4290 } 4291 4292 static void gaudi2_init_pdma(struct hl_device *hdev) 4293 { 4294 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4295 u32 reg_base; 4296 4297 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 4298 return; 4299 4300 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 4301 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 4302 4303 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 4304 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 4305 4306 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 4307 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 4308 4309 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 4310 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 4311 4312 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 4313 } 4314 4315 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 4316 { 4317 u32 reg_base, base_edma_core_id, base_edma_qman_id; 4318 4319 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 4320 base_edma_qman_id = edma_stream_base[seq]; 4321 4322 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 4323 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 4324 4325 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 4326 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 4327 } 4328 4329 static void gaudi2_init_edma(struct hl_device *hdev) 4330 { 4331 struct asic_fixed_properties *prop = &hdev->asic_prop; 4332 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4333 int dcore, inst; 4334 4335 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 4336 return; 4337 4338 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 4339 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 4340 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 4341 4342 if (!(prop->edma_enabled_mask & BIT(seq))) 4343 continue; 4344 4345 gaudi2_init_edma_instance(hdev, seq); 4346 4347 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 4348 } 4349 } 4350 } 4351 4352 /* 4353 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 4354 * @hdev: pointer to habanalabs device structure. 4355 * @sob_id: sync object ID. 4356 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 4357 * @interrupt_id: interrupt ID. 4358 * 4359 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 4360 * write directly to the HBW host memory of the virtual MSI-X doorbell. 4361 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 4362 * 4363 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 4364 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 4365 * completion, by decrementing the sync object value and re-arming the monitor. 4366 */ 4367 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 4368 u32 first_mon_id, u32 interrupt_id) 4369 { 4370 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 4371 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4372 u64 addr; 4373 u8 mask; 4374 4375 /* Reset the SOB value */ 4376 sob_offset = sob_id * sizeof(u32); 4377 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 4378 4379 /* Configure 3 monitors: 4380 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 4381 * 2. Decrement SOB value by 1. 4382 * 3. Re-arm the master monitor. 4383 */ 4384 4385 first_mon_offset = first_mon_id * sizeof(u32); 4386 4387 /* 2nd monitor: Decrement SOB value by 1 */ 4388 mon_offset = first_mon_offset + sizeof(u32); 4389 4390 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 4391 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4392 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4393 4394 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 4395 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 4396 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 4397 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4398 4399 /* 3rd monitor: Re-arm the master monitor */ 4400 mon_offset = first_mon_offset + 2 * sizeof(u32); 4401 4402 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 4403 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4404 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4405 4406 sob_group = sob_id / 8; 4407 mask = ~BIT(sob_id & 0x7); 4408 mode = 0; /* comparison mode is "greater than or equal to" */ 4409 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 4410 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 4411 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 4412 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 4413 4414 payload = arm; 4415 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4416 4417 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 4418 mon_offset = first_mon_offset; 4419 4420 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 4421 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 4422 4423 addr = gaudi2->virt_msix_db_dma_addr; 4424 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4425 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4426 4427 payload = interrupt_id; 4428 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4429 4430 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 4431 } 4432 4433 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 4434 { 4435 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 4436 struct asic_fixed_properties *prop = &hdev->asic_prop; 4437 4438 /* Decoder normal/abnormal interrupts */ 4439 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 4440 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 4441 continue; 4442 4443 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4444 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 4445 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 4446 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4447 4448 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4449 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 4450 interrupt_id += 1; 4451 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4452 } 4453 } 4454 4455 static void gaudi2_init_sm(struct hl_device *hdev) 4456 { 4457 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4458 u64 cq_address; 4459 u32 reg_val; 4460 int i; 4461 4462 /* Enable HBW/LBW CQ for completion monitors */ 4463 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4464 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 4465 4466 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 4467 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4468 4469 /* Enable only HBW CQ for KDMA completion monitor */ 4470 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4471 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4472 4473 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 4474 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 4475 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 4476 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 4477 4478 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 4479 cq_address = 4480 hdev->completion_queue[i].bus_address; 4481 4482 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 4483 lower_32_bits(cq_address)); 4484 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 4485 upper_32_bits(cq_address)); 4486 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 4487 ilog2(HL_CQ_SIZE_IN_BYTES)); 4488 } 4489 4490 /* Configure kernel ASID and MMU BP*/ 4491 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 4492 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 4493 4494 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 4495 gaudi2_prepare_sm_for_virt_msix_db(hdev); 4496 } 4497 4498 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 4499 { 4500 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4501 u32 reg_val; 4502 int i; 4503 4504 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 4505 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 4506 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 4507 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 4508 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 4509 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 4510 4511 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 4512 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 4513 4514 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 4515 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 4516 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 4517 } 4518 } 4519 4520 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 4521 bool config_qman_only) 4522 { 4523 u32 queue_id_base, reg_base; 4524 4525 switch (dcore_id) { 4526 case 0: 4527 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 4528 break; 4529 case 1: 4530 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 4531 break; 4532 case 2: 4533 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 4534 break; 4535 case 3: 4536 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 4537 break; 4538 default: 4539 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 4540 return; 4541 } 4542 4543 if (!config_qman_only) { 4544 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 4545 gaudi2_init_mme_acc(hdev, reg_base); 4546 } 4547 4548 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 4549 gaudi2_init_qman(hdev, reg_base, queue_id_base); 4550 } 4551 4552 static void gaudi2_init_mme(struct hl_device *hdev) 4553 { 4554 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4555 int i; 4556 4557 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 4558 return; 4559 4560 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 4561 gaudi2_init_dcore_mme(hdev, i, false); 4562 4563 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 4564 } 4565 } 4566 4567 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 4568 { 4569 /* Mask arithmetic and QM interrupts in TPC */ 4570 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 4571 4572 /* Set 16 cache lines */ 4573 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 4574 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 4575 } 4576 4577 struct gaudi2_tpc_init_cfg_data { 4578 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 4579 }; 4580 4581 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 4582 u32 offset, struct iterate_module_ctx *ctx) 4583 { 4584 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4585 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 4586 u32 queue_id_base; 4587 u8 seq; 4588 4589 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 4590 4591 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 4592 /* gets last sequence number */ 4593 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 4594 else 4595 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 4596 4597 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 4598 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 4599 4600 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 4601 } 4602 4603 static void gaudi2_init_tpc(struct hl_device *hdev) 4604 { 4605 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4606 struct gaudi2_tpc_init_cfg_data init_cfg_data; 4607 struct iterate_module_ctx tpc_iter; 4608 4609 if (!hdev->asic_prop.tpc_enabled_mask) 4610 return; 4611 4612 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 4613 return; 4614 4615 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 4616 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 4617 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 4618 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 4619 tpc_iter.fn = &gaudi2_init_tpc_config; 4620 tpc_iter.data = &init_cfg_data; 4621 gaudi2_iterate_tpcs(hdev, &tpc_iter); 4622 } 4623 4624 static void gaudi2_init_rotator(struct hl_device *hdev) 4625 { 4626 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4627 u32 i, reg_base, queue_id; 4628 4629 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 4630 4631 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4632 reg_base = gaudi2_qm_blocks_bases[queue_id]; 4633 gaudi2_init_qman(hdev, reg_base, queue_id); 4634 4635 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 4636 } 4637 } 4638 4639 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 4640 { 4641 u32 sob_id; 4642 4643 /* VCMD normal interrupt */ 4644 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4645 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 4646 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4647 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4648 4649 /* VCMD abnormal interrupt */ 4650 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4651 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 4652 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4653 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4654 } 4655 4656 static void gaudi2_init_dec(struct hl_device *hdev) 4657 { 4658 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4659 u32 dcore_id, dec_id, dec_bit; 4660 u64 base_addr; 4661 4662 if (!hdev->asic_prop.decoder_enabled_mask) 4663 return; 4664 4665 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 4666 return; 4667 4668 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4669 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4670 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4671 4672 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4673 continue; 4674 4675 base_addr = mmDCORE0_DEC0_CMD_BASE + 4676 BRDG_CTRL_BLOCK_OFFSET + 4677 dcore_id * DCORE_OFFSET + 4678 dec_id * DCORE_VDEC_OFFSET; 4679 4680 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4681 4682 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4683 } 4684 4685 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 4686 dec_bit = PCIE_DEC_SHIFT + dec_id; 4687 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4688 continue; 4689 4690 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 4691 dec_id * DCORE_VDEC_OFFSET; 4692 4693 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4694 4695 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4696 } 4697 } 4698 4699 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 4700 u32 stlb_base, u32 asid, u64 phys_addr) 4701 { 4702 u32 status, timeout_usec; 4703 int rc; 4704 4705 if (hdev->pldm || !hdev->pdev) 4706 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 4707 else 4708 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 4709 4710 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 4711 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 4712 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 4713 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 4714 4715 rc = hl_poll_timeout( 4716 hdev, 4717 stlb_base + STLB_BUSY_OFFSET, 4718 status, 4719 !(status & 0x80000000), 4720 1000, 4721 timeout_usec); 4722 4723 if (rc) { 4724 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 4725 return rc; 4726 } 4727 4728 return 0; 4729 } 4730 4731 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 4732 u32 start_offset, u32 inv_start_val, 4733 u32 flags) 4734 { 4735 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 4736 if (flags & MMU_OP_CLEAR_MEMCACHE) 4737 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 4738 4739 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 4740 return; 4741 4742 WREG32(stlb_base + start_offset, inv_start_val); 4743 } 4744 4745 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 4746 struct gaudi2_cache_invld_params *inv_params) 4747 { 4748 u32 status, timeout_usec, start_offset; 4749 int rc; 4750 4751 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 4752 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 4753 4754 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 4755 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 4756 rc = hl_poll_timeout( 4757 hdev, 4758 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 4759 status, 4760 status & 0x1, 4761 1000, 4762 timeout_usec); 4763 4764 if (rc) 4765 return rc; 4766 4767 /* Need to manually reset the status to 0 */ 4768 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 4769 } 4770 4771 /* Lower cache does not work with cache lines, hence we can skip its 4772 * invalidation upon map and invalidate only upon unmap 4773 */ 4774 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 4775 return 0; 4776 4777 start_offset = inv_params->range_invalidation ? 4778 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 4779 4780 rc = hl_poll_timeout( 4781 hdev, 4782 stlb_base + start_offset, 4783 status, 4784 !(status & 0x1), 4785 1000, 4786 timeout_usec); 4787 4788 return rc; 4789 } 4790 4791 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 4792 { 4793 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4794 u32 hw_cap; 4795 4796 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 4797 4798 if (gaudi2->hw_cap_initialized & hw_cap) 4799 return true; 4800 4801 return false; 4802 } 4803 4804 /* this function shall be called only for HMMUs for which capability bit is set */ 4805 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 4806 { 4807 u32 offset; 4808 4809 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 4810 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 4811 } 4812 4813 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 4814 struct gaudi2_cache_invld_params *inv_params) 4815 { 4816 u32 start_offset; 4817 4818 if (inv_params->range_invalidation) { 4819 /* Set the addresses range 4820 * Note: that the start address we set in register, is not included in 4821 * the range of the invalidation, by design. 4822 * that's why we need to set lower address than the one we actually 4823 * want to be included in the range invalidation. 4824 */ 4825 u64 start = inv_params->start_va - 1; 4826 4827 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 4828 4829 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 4830 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 4831 4832 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 4833 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 4834 4835 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 4836 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 4837 4838 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 4839 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 4840 } else { 4841 start_offset = STLB_INV_ALL_START_OFFSET; 4842 } 4843 4844 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 4845 inv_params->inv_start_val, inv_params->flags); 4846 } 4847 4848 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 4849 int dcore_id, int hmmu_id, 4850 struct gaudi2_cache_invld_params *inv_params) 4851 { 4852 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4853 4854 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 4855 } 4856 4857 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 4858 int dcore_id, int hmmu_id, 4859 struct gaudi2_cache_invld_params *inv_params) 4860 { 4861 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4862 4863 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 4864 } 4865 4866 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 4867 struct gaudi2_cache_invld_params *inv_params) 4868 { 4869 int dcore_id, hmmu_id; 4870 4871 /* first send all invalidation commands */ 4872 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4873 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4874 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4875 continue; 4876 4877 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 4878 } 4879 } 4880 4881 /* next, poll all invalidations status */ 4882 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4883 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4884 int rc; 4885 4886 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4887 continue; 4888 4889 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 4890 inv_params); 4891 if (rc) 4892 return rc; 4893 } 4894 } 4895 4896 return 0; 4897 } 4898 4899 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 4900 { 4901 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4902 struct gaudi2_cache_invld_params invld_params; 4903 int rc = 0; 4904 4905 if (hdev->reset_info.hard_reset_pending) 4906 return rc; 4907 4908 invld_params.range_invalidation = false; 4909 invld_params.inv_start_val = 1; 4910 4911 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4912 invld_params.flags = flags; 4913 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4914 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4915 &invld_params); 4916 } else if (flags & MMU_OP_PHYS_PACK) { 4917 invld_params.flags = 0; 4918 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4919 } 4920 4921 return rc; 4922 } 4923 4924 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 4925 u32 flags, u32 asid, u64 va, u64 size) 4926 { 4927 struct gaudi2_cache_invld_params invld_params = {0}; 4928 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4929 u64 start_va, end_va; 4930 u32 inv_start_val; 4931 int rc = 0; 4932 4933 if (hdev->reset_info.hard_reset_pending) 4934 return 0; 4935 4936 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 4937 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 4938 asid << MMU_RANGE_INV_ASID_SHIFT); 4939 start_va = va; 4940 end_va = start_va + size; 4941 4942 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4943 /* As range invalidation does not support zero address we will 4944 * do full invalidation in this case 4945 */ 4946 if (start_va) { 4947 invld_params.range_invalidation = true; 4948 invld_params.start_va = start_va; 4949 invld_params.end_va = end_va; 4950 invld_params.inv_start_val = inv_start_val; 4951 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 4952 } else { 4953 invld_params.range_invalidation = false; 4954 invld_params.inv_start_val = 1; 4955 invld_params.flags = flags; 4956 } 4957 4958 4959 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4960 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4961 &invld_params); 4962 if (rc) 4963 return rc; 4964 4965 } else if (flags & MMU_OP_PHYS_PACK) { 4966 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 4967 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 4968 invld_params.inv_start_val = inv_start_val; 4969 invld_params.flags = flags; 4970 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4971 } 4972 4973 return rc; 4974 } 4975 4976 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 4977 { 4978 struct asic_fixed_properties *prop = &hdev->asic_prop; 4979 u64 hop0_addr; 4980 u32 asid, max_asid = prop->max_asid; 4981 int rc; 4982 4983 /* it takes too much time to init all of the ASIDs on palladium */ 4984 if (hdev->pldm) 4985 max_asid = min((u32) 8, max_asid); 4986 4987 for (asid = 0 ; asid < max_asid ; asid++) { 4988 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 4989 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 4990 if (rc) { 4991 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 4992 return rc; 4993 } 4994 } 4995 4996 return 0; 4997 } 4998 4999 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5000 { 5001 u32 status, timeout_usec; 5002 int rc; 5003 5004 if (hdev->pldm || !hdev->pdev) 5005 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5006 else 5007 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5008 5009 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5010 5011 rc = hl_poll_timeout( 5012 hdev, 5013 stlb_base + STLB_SRAM_INIT_OFFSET, 5014 status, 5015 !status, 5016 1000, 5017 timeout_usec); 5018 5019 if (rc) 5020 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5021 5022 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5023 if (rc) 5024 return rc; 5025 5026 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5027 5028 rc = hl_poll_timeout( 5029 hdev, 5030 stlb_base + STLB_INV_ALL_START_OFFSET, 5031 status, 5032 !status, 5033 1000, 5034 timeout_usec); 5035 5036 if (rc) 5037 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5038 5039 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5040 5041 return rc; 5042 } 5043 5044 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5045 { 5046 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5047 u32 mmu_base, stlb_base; 5048 int rc; 5049 5050 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5051 return 0; 5052 5053 mmu_base = mmPMMU_HBW_MMU_BASE; 5054 stlb_base = mmPMMU_HBW_STLB_BASE; 5055 5056 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5057 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5058 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5059 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5060 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5061 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5062 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5063 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5064 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5065 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5066 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5067 5068 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5069 5070 if (PAGE_SIZE == SZ_64K) { 5071 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5072 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5073 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5074 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5075 FIELD_PREP( 5076 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5077 1), 5078 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5079 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5080 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5081 } 5082 5083 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5084 5085 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5086 if (rc) 5087 return rc; 5088 5089 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5090 5091 return 0; 5092 } 5093 5094 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5095 int hmmu_id) 5096 { 5097 struct asic_fixed_properties *prop = &hdev->asic_prop; 5098 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5099 u32 offset, mmu_base, stlb_base, hw_cap; 5100 u8 dmmu_seq; 5101 int rc; 5102 5103 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5104 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5105 5106 /* 5107 * return if DMMU is already initialized or if it's not out of 5108 * isolation (due to cluster binning) 5109 */ 5110 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5111 return 0; 5112 5113 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5114 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5115 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5116 5117 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5118 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5119 5120 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5121 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5122 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5123 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5124 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5125 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5126 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5127 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5128 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5129 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5130 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5131 5132 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5133 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5134 5135 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5136 5137 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5138 if (rc) 5139 return rc; 5140 5141 gaudi2->hw_cap_initialized |= hw_cap; 5142 5143 return 0; 5144 } 5145 5146 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5147 { 5148 int rc, dcore_id, hmmu_id; 5149 5150 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5151 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5152 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5153 if (rc) 5154 return rc; 5155 } 5156 5157 return 0; 5158 } 5159 5160 static int gaudi2_mmu_init(struct hl_device *hdev) 5161 { 5162 int rc; 5163 5164 rc = gaudi2_pci_mmu_init(hdev); 5165 if (rc) 5166 return rc; 5167 5168 rc = gaudi2_hbm_mmu_init(hdev); 5169 if (rc) 5170 return rc; 5171 5172 return 0; 5173 } 5174 5175 static int gaudi2_hw_init(struct hl_device *hdev) 5176 { 5177 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5178 int rc; 5179 5180 /* Let's mark in the H/W that we have reached this point. We check 5181 * this value in the reset_before_init function to understand whether 5182 * we need to reset the chip before doing H/W init. This register is 5183 * cleared by the H/W upon H/W reset 5184 */ 5185 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5186 5187 /* Perform read from the device to make sure device is up */ 5188 RREG32(mmHW_STATE); 5189 5190 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5191 * So we set it here and if anyone tries to move it later to 5192 * a different address, there will be an error 5193 */ 5194 if (hdev->asic_prop.iatu_done_by_fw) 5195 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5196 5197 /* 5198 * Before pushing u-boot/linux to device, need to set the hbm bar to 5199 * base address of dram 5200 */ 5201 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5202 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5203 return -EIO; 5204 } 5205 5206 rc = gaudi2_init_cpu(hdev); 5207 if (rc) { 5208 dev_err(hdev->dev, "failed to initialize CPU\n"); 5209 return rc; 5210 } 5211 5212 gaudi2_init_scrambler_hbm(hdev); 5213 gaudi2_init_kdma(hdev); 5214 5215 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5216 if (rc) { 5217 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5218 return rc; 5219 } 5220 5221 rc = gaudi2->cpucp_info_get(hdev); 5222 if (rc) { 5223 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5224 return rc; 5225 } 5226 5227 rc = gaudi2_mmu_init(hdev); 5228 if (rc) 5229 return rc; 5230 5231 gaudi2_init_pdma(hdev); 5232 gaudi2_init_edma(hdev); 5233 gaudi2_init_sm(hdev); 5234 gaudi2_init_tpc(hdev); 5235 gaudi2_init_mme(hdev); 5236 gaudi2_init_rotator(hdev); 5237 gaudi2_init_dec(hdev); 5238 gaudi2_enable_timestamp(hdev); 5239 5240 rc = gaudi2_coresight_init(hdev); 5241 if (rc) 5242 goto disable_queues; 5243 5244 rc = gaudi2_enable_msix(hdev); 5245 if (rc) 5246 goto disable_queues; 5247 5248 /* Perform read from the device to flush all configuration */ 5249 RREG32(mmHW_STATE); 5250 5251 return 0; 5252 5253 disable_queues: 5254 gaudi2_disable_dma_qmans(hdev); 5255 gaudi2_disable_mme_qmans(hdev); 5256 gaudi2_disable_tpc_qmans(hdev); 5257 gaudi2_disable_rot_qmans(hdev); 5258 gaudi2_disable_nic_qmans(hdev); 5259 5260 gaudi2_disable_timestamp(hdev); 5261 5262 return rc; 5263 } 5264 5265 /** 5266 * gaudi2_send_hard_reset_cmd - common function to handle reset 5267 * 5268 * @hdev: pointer to the habanalabs device structure 5269 * 5270 * This function handles the various possible scenarios for reset. 5271 * It considers if reset is handled by driver\FW and what FW components are loaded 5272 */ 5273 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 5274 { 5275 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5276 bool heartbeat_reset, preboot_only, cpu_initialized = false; 5277 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5278 u32 cpu_boot_status; 5279 5280 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 5281 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 5282 5283 /* 5284 * Handle corner case where failure was at cpu management app load, 5285 * and driver didn't detect any failure while loading the FW, 5286 * then at such scenario driver will send only HALT_MACHINE 5287 * and no one will respond to this request since FW already back to preboot 5288 * and it cannot handle such cmd. 5289 * In this case next time the management app loads it'll check on events register 5290 * which will still have the halt indication, and will reboot the device. 5291 * The solution is to let preboot clear all relevant registers before next boot 5292 * once driver send COMMS_RST_DEV. 5293 */ 5294 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 5295 5296 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 5297 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 5298 cpu_initialized = true; 5299 5300 /* 5301 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 5302 * 1. FW reset: FW initiate the reset sequence 5303 * 2. driver reset: FW will start HALT sequence (the preparations for the 5304 * reset but not the reset itself as it is not implemented 5305 * on their part) and LKD will wait to let FW complete the 5306 * sequence before issuing the reset 5307 */ 5308 if (!preboot_only && cpu_initialized) { 5309 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 5310 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 5311 5312 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 5313 } 5314 5315 /* 5316 * When working with preboot (without Linux/Boot fit) we can 5317 * communicate only using the COMMS commands to issue halt/reset. 5318 * 5319 * For the case in which we are working with Linux/Bootfit this is a hail-mary 5320 * attempt to revive the card in the small chance that the f/w has 5321 * experienced a watchdog event, which caused it to return back to preboot. 5322 * In that case, triggering reset through GIC won't help. We need to 5323 * trigger the reset as if Linux wasn't loaded. 5324 * 5325 * We do it only if the reset cause was HB, because that would be the 5326 * indication of such an event. 5327 * 5328 * In case watchdog hasn't expired but we still got HB, then this won't 5329 * do any damage. 5330 */ 5331 5332 if (heartbeat_reset || preboot_only || !cpu_initialized) { 5333 if (hdev->asic_prop.hard_reset_done_by_fw) 5334 hl_fw_ask_hard_reset_without_linux(hdev); 5335 else 5336 hl_fw_ask_halt_machine_without_linux(hdev); 5337 } 5338 } 5339 5340 /** 5341 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 5342 * 5343 * @hdev: pointer to the habanalabs device structure 5344 * @reset_sleep_ms: sleep time in msec after reset 5345 * 5346 * This function executes hard reset based on if driver/FW should do the reset 5347 */ 5348 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms) 5349 { 5350 if (hdev->asic_prop.hard_reset_done_by_fw) { 5351 gaudi2_send_hard_reset_cmd(hdev); 5352 return; 5353 } 5354 5355 /* Set device to handle FLR by H/W as we will put the device 5356 * CPU to halt mode 5357 */ 5358 WREG32(mmPCIE_AUX_FLR_CTRL, 5359 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 5360 5361 gaudi2_send_hard_reset_cmd(hdev); 5362 5363 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 5364 } 5365 5366 /** 5367 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 5368 * 5369 * @hdev: pointer to the habanalabs device structure 5370 * @reset_sleep_ms: sleep time in msec after reset 5371 * @driver_performs_reset: true if driver should perform reset instead of f/w. 5372 * 5373 * This function executes soft reset based on if driver/FW should do the reset 5374 */ 5375 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms, 5376 bool driver_performs_reset) 5377 { 5378 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5379 5380 if (!driver_performs_reset) { 5381 /* set SP to indicate reset request sent to FW */ 5382 if (dyn_regs->cpu_rst_status) 5383 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 5384 else 5385 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5386 5387 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 5388 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 5389 return; 5390 } 5391 5392 /* Block access to engines, QMANs and SM during reset, these 5393 * RRs will be reconfigured after soft reset. 5394 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 5395 */ 5396 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 5397 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 5398 5399 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 5400 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 5401 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 5402 5403 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 5404 } 5405 5406 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms, 5407 u32 poll_timeout_us) 5408 { 5409 int i, rc = 0; 5410 u32 reg_val; 5411 5412 /* without this sleep reset will not work */ 5413 msleep(reset_sleep_ms); 5414 5415 /* We poll the BTM done indication multiple times after reset due to 5416 * a HW errata 'GAUDI2_0300' 5417 */ 5418 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5419 rc = hl_poll_timeout( 5420 hdev, 5421 mmPSOC_GLOBAL_CONF_BTM_FSM, 5422 reg_val, 5423 reg_val == 0, 5424 1000, 5425 poll_timeout_us); 5426 5427 if (rc) 5428 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 5429 } 5430 5431 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 5432 { 5433 int i, rc = 0; 5434 u32 reg_val; 5435 5436 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5437 rc = hl_poll_timeout( 5438 hdev, 5439 mmCPU_RST_STATUS_TO_HOST, 5440 reg_val, 5441 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 5442 1000, 5443 poll_timeout_us); 5444 5445 if (rc) 5446 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 5447 reg_val); 5448 } 5449 5450 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 5451 { 5452 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5453 u32 poll_timeout_us, reset_sleep_ms; 5454 bool driver_performs_reset = false; 5455 5456 if (hdev->pldm) { 5457 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 5458 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 5459 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 5460 } else { 5461 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 5462 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 5463 } 5464 5465 if (fw_reset) 5466 goto skip_reset; 5467 5468 gaudi2_reset_arcs(hdev); 5469 5470 if (hard_reset) { 5471 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 5472 gaudi2_execute_hard_reset(hdev, reset_sleep_ms); 5473 } else { 5474 /* 5475 * As we have to support also work with preboot only (which does not supports 5476 * soft reset) we have to make sure that security is disabled before letting driver 5477 * do the reset. user shall control the BFE flags to avoid asking soft reset in 5478 * secured device with preboot only. 5479 */ 5480 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 5481 !hdev->asic_prop.fw_security_enabled); 5482 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset); 5483 } 5484 5485 skip_reset: 5486 if (driver_performs_reset || hard_reset) 5487 /* 5488 * Instead of waiting for BTM indication we should wait for preboot ready: 5489 * Consider the below scenario: 5490 * 1. FW update is being triggered 5491 * - setting the dirty bit 5492 * 2. hard reset will be triggered due to the dirty bit 5493 * 3. FW initiates the reset: 5494 * - dirty bit cleared 5495 * - BTM indication cleared 5496 * - preboot ready indication cleared 5497 * 4. during hard reset: 5498 * - BTM indication will be set 5499 * - BIST test performed and another reset triggered 5500 * 5. only after this reset the preboot will set the preboot ready 5501 * 5502 * when polling on BTM indication alone we can lose sync with FW while trying to 5503 * communicate with FW that is during reset. 5504 * to overcome this we will always wait to preboot ready indication 5505 */ 5506 if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { 5507 msleep(reset_sleep_ms); 5508 hl_fw_wait_preboot_ready(hdev); 5509 } else { 5510 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); 5511 } 5512 else 5513 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 5514 5515 if (!gaudi2) 5516 return; 5517 5518 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 5519 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 5520 5521 /* 5522 * Clear NIC capability mask in order for driver to re-configure 5523 * NIC QMANs. NIC ports will not be re-configured during soft 5524 * reset as we call gaudi2_nic_init only during hard reset 5525 */ 5526 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 5527 5528 if (hard_reset) { 5529 gaudi2->hw_cap_initialized &= 5530 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 5531 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 5532 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 5533 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 5534 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 5535 5536 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 5537 } else { 5538 gaudi2->hw_cap_initialized &= 5539 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 5540 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 5541 HW_CAP_ROT_MASK); 5542 } 5543 } 5544 5545 static int gaudi2_suspend(struct hl_device *hdev) 5546 { 5547 int rc; 5548 5549 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 5550 if (rc) 5551 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 5552 5553 return rc; 5554 } 5555 5556 static int gaudi2_resume(struct hl_device *hdev) 5557 { 5558 return gaudi2_init_iatu(hdev); 5559 } 5560 5561 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 5562 void *cpu_addr, dma_addr_t dma_addr, size_t size) 5563 { 5564 int rc; 5565 5566 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 5567 VM_DONTCOPY | VM_NORESERVE; 5568 5569 #ifdef _HAS_DMA_MMAP_COHERENT 5570 5571 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 5572 if (rc) 5573 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 5574 5575 #else 5576 5577 rc = remap_pfn_range(vma, vma->vm_start, 5578 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 5579 size, vma->vm_page_prot); 5580 if (rc) 5581 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 5582 5583 #endif 5584 5585 return rc; 5586 } 5587 5588 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 5589 { 5590 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5591 u64 hw_cap_mask = 0; 5592 u64 hw_tpc_cap_bit = 0; 5593 u64 hw_nic_cap_bit = 0; 5594 u64 hw_test_cap_bit = 0; 5595 5596 switch (hw_queue_id) { 5597 case GAUDI2_QUEUE_ID_PDMA_0_0: 5598 case GAUDI2_QUEUE_ID_PDMA_0_1: 5599 case GAUDI2_QUEUE_ID_PDMA_1_0: 5600 hw_cap_mask = HW_CAP_PDMA_MASK; 5601 break; 5602 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 5603 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 5604 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 5605 break; 5606 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 5607 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 5608 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 5609 break; 5610 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 5611 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 5612 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 5613 break; 5614 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 5615 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 5616 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 5617 break; 5618 5619 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 5620 hw_test_cap_bit = HW_CAP_MME_SHIFT; 5621 break; 5622 5623 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 5624 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 5625 break; 5626 5627 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 5628 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 5629 break; 5630 5631 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 5632 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 5633 break; 5634 5635 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 5636 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 5637 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 5638 5639 /* special case where cap bit refers to the first queue id */ 5640 if (!hw_tpc_cap_bit) 5641 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 5642 break; 5643 5644 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 5645 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 5646 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 5647 break; 5648 5649 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 5650 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 5651 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 5652 break; 5653 5654 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 5655 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 5656 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 5657 break; 5658 5659 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 5660 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 5661 break; 5662 5663 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 5664 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 5665 break; 5666 5667 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 5668 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 5669 5670 /* special case where cap bit refers to the first queue id */ 5671 if (!hw_nic_cap_bit) 5672 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 5673 break; 5674 5675 case GAUDI2_QUEUE_ID_CPU_PQ: 5676 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 5677 5678 default: 5679 return false; 5680 } 5681 5682 if (hw_tpc_cap_bit) 5683 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 5684 5685 if (hw_nic_cap_bit) 5686 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 5687 5688 if (hw_test_cap_bit) 5689 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 5690 5691 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 5692 } 5693 5694 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 5695 { 5696 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5697 5698 switch (arc_id) { 5699 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5700 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5701 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 5702 5703 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5704 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5705 5706 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5707 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5708 5709 default: 5710 return false; 5711 } 5712 } 5713 5714 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5715 { 5716 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5717 5718 switch (arc_id) { 5719 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5720 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5721 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 5722 break; 5723 5724 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5725 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5726 break; 5727 5728 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5729 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5730 break; 5731 5732 default: 5733 return; 5734 } 5735 } 5736 5737 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5738 { 5739 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5740 5741 switch (arc_id) { 5742 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5743 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5744 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 5745 break; 5746 5747 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5748 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 5749 break; 5750 5751 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5752 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 5753 break; 5754 5755 default: 5756 return; 5757 } 5758 } 5759 5760 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 5761 { 5762 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5763 u32 pq_offset, reg_base, db_reg_offset, db_value; 5764 5765 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 5766 /* 5767 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 5768 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 5769 * number. 5770 */ 5771 pq_offset = (hw_queue_id & 0x3) * 4; 5772 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 5773 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 5774 } else { 5775 db_reg_offset = mmCPU_IF_PF_PQ_PI; 5776 } 5777 5778 db_value = pi; 5779 5780 /* ring the doorbell */ 5781 WREG32(db_reg_offset, db_value); 5782 5783 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 5784 /* make sure device CPU will read latest data from host */ 5785 mb(); 5786 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 5787 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 5788 } 5789 } 5790 5791 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 5792 { 5793 __le64 *pbd = (__le64 *) bd; 5794 5795 /* The QMANs are on the host memory so a simple copy suffice */ 5796 pqe[0] = pbd[0]; 5797 pqe[1] = pbd[1]; 5798 } 5799 5800 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 5801 dma_addr_t *dma_handle, gfp_t flags) 5802 { 5803 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 5804 } 5805 5806 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 5807 void *cpu_addr, dma_addr_t dma_handle) 5808 { 5809 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 5810 } 5811 5812 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 5813 u32 timeout, u64 *result) 5814 { 5815 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5816 5817 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 5818 if (result) 5819 *result = 0; 5820 return 0; 5821 } 5822 5823 if (!timeout) 5824 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 5825 5826 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 5827 } 5828 5829 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 5830 gfp_t mem_flags, dma_addr_t *dma_handle) 5831 { 5832 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 5833 return NULL; 5834 5835 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 5836 } 5837 5838 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 5839 { 5840 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 5841 } 5842 5843 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 5844 dma_addr_t *dma_handle) 5845 { 5846 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 5847 } 5848 5849 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 5850 { 5851 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 5852 } 5853 5854 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 5855 enum dma_data_direction dir) 5856 { 5857 dma_addr_t dma_addr; 5858 5859 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 5860 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 5861 return 0; 5862 5863 return dma_addr; 5864 } 5865 5866 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 5867 enum dma_data_direction dir) 5868 { 5869 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 5870 } 5871 5872 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 5873 { 5874 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5875 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5876 5877 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 5878 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5879 return -EINVAL; 5880 } 5881 5882 /* Just check if CB address is valid */ 5883 5884 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5885 parser->user_cb_size, 5886 asic_prop->sram_user_base_address, 5887 asic_prop->sram_end_address)) 5888 return 0; 5889 5890 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5891 parser->user_cb_size, 5892 asic_prop->dram_user_base_address, 5893 asic_prop->dram_end_address)) 5894 return 0; 5895 5896 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 5897 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5898 parser->user_cb_size, 5899 asic_prop->dmmu.start_addr, 5900 asic_prop->dmmu.end_addr)) 5901 return 0; 5902 5903 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 5904 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5905 parser->user_cb_size, 5906 asic_prop->pmmu.start_addr, 5907 asic_prop->pmmu.end_addr) || 5908 hl_mem_area_inside_range( 5909 (u64) (uintptr_t) parser->user_cb, 5910 parser->user_cb_size, 5911 asic_prop->pmmu_huge.start_addr, 5912 asic_prop->pmmu_huge.end_addr)) 5913 return 0; 5914 5915 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 5916 if (!hdev->pdev) 5917 return 0; 5918 5919 if (!device_iommu_mapped(&hdev->pdev->dev)) 5920 return 0; 5921 } 5922 5923 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 5924 parser->user_cb, parser->user_cb_size); 5925 5926 return -EFAULT; 5927 } 5928 5929 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5930 { 5931 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5932 5933 if (!parser->is_kernel_allocated_cb) 5934 return gaudi2_validate_cb_address(hdev, parser); 5935 5936 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5937 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 5938 return -EINVAL; 5939 } 5940 5941 return 0; 5942 } 5943 5944 static int gaudi2_send_heartbeat(struct hl_device *hdev) 5945 { 5946 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5947 5948 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 5949 return 0; 5950 5951 return hl_fw_send_heartbeat(hdev); 5952 } 5953 5954 /* This is an internal helper function, used to update the KDMA mmu props. 5955 * Should be called with a proper kdma lock. 5956 */ 5957 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 5958 bool mmu_bypass, u32 asid) 5959 { 5960 u32 rw_asid, rw_mmu_bp; 5961 5962 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 5963 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 5964 5965 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 5966 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 5967 5968 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 5969 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 5970 } 5971 5972 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 5973 u32 mon_payload, u32 sync_value) 5974 { 5975 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 5976 u8 mask; 5977 5978 sob_offset = sob_id * 4; 5979 mon_offset = mon_id * 4; 5980 5981 /* Reset the SOB value */ 5982 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 5983 5984 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 5985 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 5986 5987 /* Configure this address with CS index because CQ_EN is set */ 5988 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 5989 5990 sync_group_id = sob_id / 8; 5991 mask = ~(1 << (sob_id & 0x7)); 5992 mode = 1; /* comparison mode is "equal to" */ 5993 5994 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 5995 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 5996 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 5997 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 5998 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 5999 } 6000 6001 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6002 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6003 u64 src_addr, u64 dst_addr, 6004 u32 size, bool is_memset) 6005 { 6006 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6007 struct hl_cq_entry *cq_base; 6008 struct hl_cq *cq; 6009 u64 comp_addr; 6010 int rc; 6011 6012 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6013 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6014 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6015 6016 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6017 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6018 6019 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6020 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6021 6022 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6023 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6024 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6025 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6026 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6027 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6028 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6029 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6030 6031 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6032 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6033 6034 if (is_memset) 6035 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6036 6037 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6038 6039 /* Wait for completion */ 6040 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6041 cq_base = cq->kernel_address; 6042 polling_addr = (u32 *)&cq_base[cq->ci]; 6043 6044 if (hdev->pldm) 6045 /* for each 1MB 20 second of timeout */ 6046 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6047 else 6048 timeout = KDMA_TIMEOUT_USEC; 6049 6050 /* Polling */ 6051 rc = hl_poll_timeout_memory( 6052 hdev, 6053 polling_addr, 6054 status, 6055 (status == 1), 6056 1000, 6057 timeout, 6058 true); 6059 6060 *polling_addr = 0; 6061 6062 if (rc) { 6063 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6064 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6065 return rc; 6066 } 6067 6068 cq->ci = hl_cq_inc_ptr(cq->ci); 6069 6070 return 0; 6071 } 6072 6073 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6074 { 6075 u32 i; 6076 6077 for (i = 0 ; i < size ; i += sizeof(u32)) 6078 WREG32(addr + i, val); 6079 } 6080 6081 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6082 { 6083 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6084 6085 if (enable) { 6086 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6087 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6088 } else { 6089 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6090 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6091 } 6092 } 6093 6094 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) 6095 { 6096 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 6097 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6098 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; 6099 struct packet_msg_short *msg_short_pkt; 6100 dma_addr_t pkt_dma_addr; 6101 size_t pkt_size; 6102 int rc; 6103 6104 if (hdev->pldm) 6105 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6106 else 6107 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6108 6109 pkt_size = sizeof(*msg_short_pkt); 6110 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); 6111 if (!msg_short_pkt) { 6112 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", 6113 hw_queue_id); 6114 return -ENOMEM; 6115 } 6116 6117 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6118 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6119 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6120 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6121 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6122 6123 msg_short_pkt->value = cpu_to_le32(sob_val); 6124 msg_short_pkt->ctl = cpu_to_le32(tmp); 6125 6126 /* Reset the SOB value */ 6127 WREG32(sob_addr, 0); 6128 6129 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 6130 if (rc) { 6131 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", 6132 hw_queue_id); 6133 goto free_pkt; 6134 } 6135 6136 rc = hl_poll_timeout( 6137 hdev, 6138 sob_addr, 6139 tmp, 6140 (tmp == sob_val), 6141 1000, 6142 timeout_usec); 6143 6144 if (rc == -ETIMEDOUT) { 6145 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6146 hw_queue_id, tmp); 6147 rc = -EIO; 6148 } 6149 6150 /* Reset the SOB value */ 6151 WREG32(sob_addr, 0); 6152 6153 free_pkt: 6154 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); 6155 return rc; 6156 } 6157 6158 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6159 { 6160 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6161 6162 /* 6163 * check capability here as send_cpu_message() won't update the result 6164 * value if no capability 6165 */ 6166 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6167 return 0; 6168 6169 return hl_fw_test_cpu_queue(hdev); 6170 } 6171 6172 static int gaudi2_test_queues(struct hl_device *hdev) 6173 { 6174 int i, rc, ret_val = 0; 6175 6176 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6177 if (!gaudi2_is_queue_enabled(hdev, i)) 6178 continue; 6179 6180 gaudi2_qman_set_test_mode(hdev, i, true); 6181 rc = gaudi2_test_queue(hdev, i); 6182 gaudi2_qman_set_test_mode(hdev, i, false); 6183 6184 if (rc) { 6185 ret_val = -EINVAL; 6186 goto done; 6187 } 6188 } 6189 6190 rc = gaudi2_test_cpu_queue(hdev); 6191 if (rc) { 6192 ret_val = -EINVAL; 6193 goto done; 6194 } 6195 6196 done: 6197 return ret_val; 6198 } 6199 6200 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6201 { 6202 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6203 size_t irq_arr_size; 6204 6205 /* TODO: missing gaudi2_nic_resume. 6206 * Until implemented nic_hw_cap_initialized will remain zeroed 6207 */ 6208 gaudi2_init_arcs(hdev); 6209 gaudi2_scrub_arcs_dccm(hdev); 6210 gaudi2_init_security(hdev); 6211 6212 /* Unmask all IRQs since some could have been received during the soft reset */ 6213 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 6214 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 6215 } 6216 6217 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 6218 struct iterate_module_ctx *ctx) 6219 { 6220 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 6221 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 6222 bool is_eng_idle; 6223 int engine_idx; 6224 6225 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 6226 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 6227 else 6228 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 6229 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 6230 6231 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 6232 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 6233 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 6234 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 6235 6236 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6237 IS_TPC_IDLE(tpc_cfg_sts); 6238 *(idle_data->is_idle) &= is_eng_idle; 6239 6240 if (idle_data->mask && !is_eng_idle) 6241 set_bit(engine_idx, idle_data->mask); 6242 6243 if (idle_data->e) 6244 hl_engine_data_sprintf(idle_data->e, 6245 idle_data->tpc_fmt, dcore, inst, 6246 is_eng_idle ? "Y" : "N", 6247 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6248 } 6249 6250 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6251 struct engines_data *e) 6252 { 6253 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, 6254 mme_arch_sts, dec_swreg15, dec_enabled_bit; 6255 struct asic_fixed_properties *prop = &hdev->asic_prop; 6256 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n"; 6257 unsigned long *mask = (unsigned long *) mask_arr; 6258 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n"; 6259 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 6260 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 6261 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n"; 6262 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 6263 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 6264 bool is_idle = true, is_eng_idle; 6265 u64 offset; 6266 6267 struct gaudi2_tpc_idle_data tpc_idle_data = { 6268 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 6269 .e = e, 6270 .mask = mask, 6271 .is_idle = &is_idle, 6272 }; 6273 struct iterate_module_ctx tpc_iter = { 6274 .fn = &gaudi2_is_tpc_engine_idle, 6275 .data = &tpc_idle_data, 6276 }; 6277 6278 int engine_idx, i, j; 6279 6280 /* EDMA, Two engines per Dcore */ 6281 if (e) 6282 hl_engine_data_sprintf(e, 6283 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6284 "---- ---- ------- ------------ ----------------------\n"); 6285 6286 for (i = 0; i < NUM_OF_DCORES; i++) { 6287 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 6288 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 6289 6290 if (!(prop->edma_enabled_mask & BIT(seq))) 6291 continue; 6292 6293 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 6294 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6295 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 6296 6297 dma_core_idle_ind_mask = 6298 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset); 6299 6300 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 6301 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 6302 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 6303 6304 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6305 IS_DMA_IDLE(dma_core_idle_ind_mask); 6306 is_idle &= is_eng_idle; 6307 6308 if (mask && !is_eng_idle) 6309 set_bit(engine_idx, mask); 6310 6311 if (e) 6312 hl_engine_data_sprintf(e, edma_fmt, i, j, 6313 is_eng_idle ? "Y" : "N", 6314 qm_glbl_sts0, 6315 dma_core_idle_ind_mask); 6316 } 6317 } 6318 6319 /* PDMA, Two engines in Full chip */ 6320 if (e) 6321 hl_engine_data_sprintf(e, 6322 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6323 "---- ------- ------------ ----------------------\n"); 6324 6325 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6326 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 6327 offset = i * PDMA_OFFSET; 6328 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset); 6329 6330 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 6331 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 6332 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 6333 6334 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6335 IS_DMA_IDLE(dma_core_idle_ind_mask); 6336 is_idle &= is_eng_idle; 6337 6338 if (mask && !is_eng_idle) 6339 set_bit(engine_idx, mask); 6340 6341 if (e) 6342 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 6343 qm_glbl_sts0, dma_core_idle_ind_mask); 6344 } 6345 6346 /* NIC, twelve macros in Full chip */ 6347 if (e && hdev->nic_ports_mask) 6348 hl_engine_data_sprintf(e, 6349 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6350 "--- ------- ------------ ----------\n"); 6351 6352 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6353 if (!(i & 1)) 6354 offset = i / 2 * NIC_OFFSET; 6355 else 6356 offset += NIC_QM_OFFSET; 6357 6358 if (!(hdev->nic_ports_mask & BIT(i))) 6359 continue; 6360 6361 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 6362 6363 6364 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 6365 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 6366 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 6367 6368 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6369 is_idle &= is_eng_idle; 6370 6371 if (mask && !is_eng_idle) 6372 set_bit(engine_idx, mask); 6373 6374 if (e) 6375 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 6376 qm_glbl_sts0, qm_cgm_sts); 6377 } 6378 6379 if (e) 6380 hl_engine_data_sprintf(e, 6381 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6382 "--- ---- ------- ------------ ---------------\n"); 6383 /* MME, one per Dcore */ 6384 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6385 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 6386 offset = i * DCORE_OFFSET; 6387 6388 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 6389 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 6390 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 6391 6392 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6393 is_idle &= is_eng_idle; 6394 6395 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 6396 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 6397 is_idle &= is_eng_idle; 6398 6399 if (e) 6400 hl_engine_data_sprintf(e, mme_fmt, i, "N", 6401 is_eng_idle ? "Y" : "N", 6402 qm_glbl_sts0, 6403 mme_arch_sts); 6404 6405 if (mask && !is_eng_idle) 6406 set_bit(engine_idx, mask); 6407 } 6408 6409 /* 6410 * TPC 6411 */ 6412 if (e && prop->tpc_enabled_mask) 6413 hl_engine_data_sprintf(e, 6414 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" 6415 "---- --- -------- ------------ ---------- ----------------------\n"); 6416 6417 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6418 6419 /* Decoders, two each Dcore and two shared PCIe decoders */ 6420 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6421 hl_engine_data_sprintf(e, 6422 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 6423 "---- --- ------- ---------------\n"); 6424 6425 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6426 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 6427 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 6428 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 6429 continue; 6430 6431 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 6432 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6433 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 6434 6435 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 6436 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6437 is_idle &= is_eng_idle; 6438 6439 if (mask && !is_eng_idle) 6440 set_bit(engine_idx, mask); 6441 6442 if (e) 6443 hl_engine_data_sprintf(e, dec_fmt, i, j, 6444 is_eng_idle ? "Y" : "N", dec_swreg15); 6445 } 6446 } 6447 6448 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6449 hl_engine_data_sprintf(e, 6450 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 6451 "-------- ------- ---------------\n"); 6452 6453 /* Check shared(PCIe) decoders */ 6454 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 6455 dec_enabled_bit = PCIE_DEC_SHIFT + i; 6456 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 6457 continue; 6458 6459 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 6460 offset = i * DCORE_DEC_OFFSET; 6461 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 6462 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6463 is_idle &= is_eng_idle; 6464 6465 if (mask && !is_eng_idle) 6466 set_bit(engine_idx, mask); 6467 6468 if (e) 6469 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 6470 is_eng_idle ? "Y" : "N", dec_swreg15); 6471 } 6472 6473 if (e) 6474 hl_engine_data_sprintf(e, 6475 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6476 "---- ---- ------- ------------ ---------- -------------\n"); 6477 6478 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6479 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 6480 6481 offset = i * ROT_OFFSET; 6482 6483 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 6484 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 6485 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 6486 6487 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6488 is_idle &= is_eng_idle; 6489 6490 if (mask && !is_eng_idle) 6491 set_bit(engine_idx, mask); 6492 6493 if (e) 6494 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6495 qm_glbl_sts0, qm_cgm_sts, "-"); 6496 } 6497 6498 return is_idle; 6499 } 6500 6501 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 6502 __acquires(&gaudi2->hw_queues_lock) 6503 { 6504 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6505 6506 spin_lock(&gaudi2->hw_queues_lock); 6507 } 6508 6509 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 6510 __releases(&gaudi2->hw_queues_lock) 6511 { 6512 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6513 6514 spin_unlock(&gaudi2->hw_queues_lock); 6515 } 6516 6517 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 6518 { 6519 return hdev->pdev->device; 6520 } 6521 6522 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 6523 { 6524 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6525 6526 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6527 return 0; 6528 6529 return hl_fw_get_eeprom_data(hdev, data, max_size); 6530 } 6531 6532 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 6533 { 6534 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 6535 } 6536 6537 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 6538 { 6539 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6540 6541 if (aggregate) { 6542 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 6543 return gaudi2->events_stat_aggregate; 6544 } 6545 6546 *size = (u32) sizeof(gaudi2->events_stat); 6547 return gaudi2->events_stat; 6548 } 6549 6550 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 6551 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6552 { 6553 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 6554 dcore_vdec_id + DCORE_OFFSET * dcore_id; 6555 6556 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6557 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6558 6559 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6560 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6561 6562 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6563 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6564 6565 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6566 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6567 6568 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6569 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6570 } 6571 6572 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 6573 { 6574 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6575 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6576 struct asic_fixed_properties *prop = &hdev->asic_prop; 6577 u32 dcore_offset = dcore_id * DCORE_OFFSET; 6578 u32 vdec_id, i, ports_offset, reg_val; 6579 u8 edma_seq_base; 6580 6581 /* EDMA */ 6582 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 6583 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 6584 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6585 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6586 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6587 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6588 } 6589 6590 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 6591 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6592 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6593 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6594 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6595 } 6596 6597 /* Sync Mngr */ 6598 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 6599 /* 6600 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 6601 * for any access type 6602 */ 6603 if (dcore_id > 0) { 6604 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 6605 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 6606 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 6607 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 6608 } 6609 6610 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 6611 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 6612 6613 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 6614 ports_offset = i * DCORE_MME_SBTE_OFFSET; 6615 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 6616 dcore_offset + ports_offset, 0); 6617 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 6618 dcore_offset + ports_offset, rw_asid); 6619 } 6620 6621 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 6622 ports_offset = i * DCORE_MME_WB_OFFSET; 6623 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 6624 dcore_offset + ports_offset, 0); 6625 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 6626 dcore_offset + ports_offset, rw_asid); 6627 } 6628 6629 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6630 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6631 6632 /* 6633 * Decoders 6634 */ 6635 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 6636 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 6637 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 6638 } 6639 } 6640 6641 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 6642 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6643 { 6644 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 6645 6646 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6647 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6648 6649 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6650 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6651 6652 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6653 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6654 6655 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6656 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6657 6658 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6659 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6660 } 6661 6662 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 6663 u32 rw_asid, u32 rw_mmu_bp) 6664 { 6665 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 6666 6667 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 6668 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 6669 } 6670 6671 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 6672 { 6673 u32 reg_base, reg_offset, reg_val = 0; 6674 6675 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 6676 6677 /* Enable MMU and configure asid for all relevant ARC regions */ 6678 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 6679 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 6680 6681 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 6682 WREG32(reg_base + reg_offset, reg_val); 6683 6684 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 6685 WREG32(reg_base + reg_offset, reg_val); 6686 6687 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 6688 WREG32(reg_base + reg_offset, reg_val); 6689 6690 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 6691 WREG32(reg_base + reg_offset, reg_val); 6692 6693 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 6694 WREG32(reg_base + reg_offset, reg_val); 6695 6696 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 6697 WREG32(reg_base + reg_offset, reg_val); 6698 6699 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 6700 WREG32(reg_base + reg_offset, reg_val); 6701 6702 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 6703 WREG32(reg_base + reg_offset, reg_val); 6704 6705 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 6706 WREG32(reg_base + reg_offset, reg_val); 6707 6708 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 6709 WREG32(reg_base + reg_offset, reg_val); 6710 6711 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 6712 WREG32(reg_base + reg_offset, reg_val); 6713 } 6714 6715 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 6716 { 6717 int i; 6718 6719 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 6720 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 6721 6722 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6723 gaudi2_arc_mmu_prepare(hdev, i, asid); 6724 6725 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 6726 if (!gaudi2_is_queue_enabled(hdev, i)) 6727 continue; 6728 6729 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 6730 } 6731 6732 return 0; 6733 } 6734 6735 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 6736 { 6737 struct asic_fixed_properties *prop = &hdev->asic_prop; 6738 u32 rw_asid, offset; 6739 int rc, i; 6740 6741 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 6742 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 6743 6744 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6745 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6746 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6747 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6748 6749 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6750 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6751 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6752 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6753 6754 /* ROT */ 6755 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6756 offset = i * ROT_OFFSET; 6757 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 6758 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6759 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 6760 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 6761 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 6762 } 6763 6764 /* Shared Decoders are the last bits in the decoders mask */ 6765 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 6766 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 6767 6768 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 6769 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 6770 6771 /* arc farm arc dup eng */ 6772 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6773 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 6774 6775 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 6776 if (rc) 6777 return rc; 6778 6779 return 0; 6780 } 6781 6782 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 6783 struct iterate_module_ctx *ctx) 6784 { 6785 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 6786 6787 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 6788 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 6789 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6790 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 6791 } 6792 6793 /* zero the MMUBP and set the ASID */ 6794 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 6795 { 6796 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6797 struct gaudi2_tpc_mmu_data tpc_mmu_data; 6798 struct iterate_module_ctx tpc_iter = { 6799 .fn = &gaudi2_tpc_mmu_prepare, 6800 .data = &tpc_mmu_data, 6801 }; 6802 int rc, i; 6803 6804 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 6805 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6806 return -EINVAL; 6807 } 6808 6809 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 6810 return 0; 6811 6812 rc = gaudi2_mmu_shared_prepare(hdev, asid); 6813 if (rc) 6814 return rc; 6815 6816 /* configure DCORE MMUs */ 6817 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6818 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6819 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6820 for (i = 0 ; i < NUM_OF_DCORES ; i++) 6821 gaudi2_mmu_dcore_prepare(hdev, i, asid); 6822 6823 return 0; 6824 } 6825 6826 static inline bool is_info_event(u32 event) 6827 { 6828 switch (event) { 6829 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 6830 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 6831 6832 /* return in case of NIC status event - these events are received periodically and not as 6833 * an indication to an error. 6834 */ 6835 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 6836 return true; 6837 default: 6838 return false; 6839 } 6840 } 6841 6842 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 6843 bool ratelimited, const char *fmt, ...) 6844 { 6845 struct va_format vaf; 6846 va_list args; 6847 6848 va_start(args, fmt); 6849 vaf.fmt = fmt; 6850 vaf.va = &args; 6851 6852 if (ratelimited) 6853 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 6854 gaudi2_irq_map_table[event_type].valid ? 6855 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6856 else 6857 dev_err(hdev->dev, "%s: %pV\n", 6858 gaudi2_irq_map_table[event_type].valid ? 6859 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6860 6861 va_end(args); 6862 } 6863 6864 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 6865 struct hl_eq_ecc_data *ecc_data) 6866 { 6867 u64 ecc_address = 0, ecc_syndrom = 0; 6868 u8 memory_wrapper_idx = 0; 6869 6870 ecc_address = le64_to_cpu(ecc_data->ecc_address); 6871 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 6872 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 6873 6874 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 6875 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n", 6876 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 6877 6878 return !!ecc_data->is_critical; 6879 } 6880 6881 /* 6882 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6883 * 6884 * @idx: the current pi/ci value 6885 * @q_len: the queue length (power of 2) 6886 * 6887 * @return the cyclically decremented index 6888 */ 6889 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 6890 { 6891 u32 mask = q_len - 1; 6892 6893 /* 6894 * modular decrement is equivalent to adding (queue_size -1) 6895 * later we take LSBs to make sure the value is in the 6896 * range [0, queue_len - 1] 6897 */ 6898 return (idx + q_len - 1) & mask; 6899 } 6900 6901 /** 6902 * gaudi2_print_sw_config_stream_data - print SW config stream data 6903 * 6904 * @hdev: pointer to the habanalabs device structure 6905 * @stream: the QMAN's stream 6906 * @qman_base: base address of QMAN registers block 6907 */ 6908 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 6909 u32 stream, u64 qman_base) 6910 { 6911 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6912 u32 cq_ptr_lo_off, size; 6913 6914 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 6915 6916 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 6917 stream * cq_ptr_lo_off; 6918 6919 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6920 6921 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6922 6923 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6924 size = RREG32(cq_tsize); 6925 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 6926 stream, cq_ptr, size); 6927 } 6928 6929 /** 6930 * gaudi2_print_last_pqes_on_err - print last PQEs on error 6931 * 6932 * @hdev: pointer to the habanalabs device structure 6933 * @qid_base: first QID of the QMAN (out of 4 streams) 6934 * @stream: the QMAN's stream 6935 * @qman_base: base address of QMAN registers block 6936 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6937 */ 6938 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 6939 u64 qman_base, bool pr_sw_conf) 6940 { 6941 u32 ci, qm_ci_stream_off; 6942 struct hl_hw_queue *q; 6943 u64 pq_ci; 6944 int i; 6945 6946 q = &hdev->kernel_queues[qid_base + stream]; 6947 6948 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 6949 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 6950 stream * qm_ci_stream_off; 6951 6952 hdev->asic_funcs->hw_queues_lock(hdev); 6953 6954 if (pr_sw_conf) 6955 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 6956 6957 ci = RREG32(pq_ci); 6958 6959 /* we should start printing form ci -1 */ 6960 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6961 6962 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6963 struct hl_bd *bd; 6964 u64 addr; 6965 u32 len; 6966 6967 bd = q->kernel_address; 6968 bd += ci; 6969 6970 len = le32_to_cpu(bd->len); 6971 /* len 0 means uninitialized entry- break */ 6972 if (!len) 6973 break; 6974 6975 addr = le64_to_cpu(bd->ptr); 6976 6977 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 6978 stream, ci, addr, len); 6979 6980 /* get previous ci, wrap if needed */ 6981 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6982 } 6983 6984 hdev->asic_funcs->hw_queues_unlock(hdev); 6985 } 6986 6987 /** 6988 * print_qman_data_on_err - extract QMAN data on error 6989 * 6990 * @hdev: pointer to the habanalabs device structure 6991 * @qid_base: first QID of the QMAN (out of 4 streams) 6992 * @stream: the QMAN's stream 6993 * @qman_base: base address of QMAN registers block 6994 * 6995 * This function attempt to extract as much data as possible on QMAN error. 6996 * On upper CP print the SW config stream data and last 8 PQEs. 6997 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6998 */ 6999 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7000 { 7001 u32 i; 7002 7003 if (stream != QMAN_STREAMS) { 7004 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7005 return; 7006 } 7007 7008 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7009 7010 for (i = 0 ; i < QMAN_STREAMS ; i++) 7011 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7012 } 7013 7014 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7015 u64 qman_base, u32 qid_base) 7016 { 7017 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7018 u64 glbl_sts_addr, arb_err_addr; 7019 char reg_desc[32]; 7020 7021 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7022 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7023 7024 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7025 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7026 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7027 7028 if (!glbl_sts_val) 7029 continue; 7030 7031 if (i == QMAN_STREAMS) { 7032 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7033 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7034 } else { 7035 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7036 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7037 } 7038 7039 for (j = 0 ; j < num_error_causes ; j++) 7040 if (glbl_sts_val & BIT(j)) { 7041 gaudi2_print_event(hdev, event_type, true, 7042 "%s. err cause: %s", reg_desc, 7043 i == QMAN_STREAMS ? 7044 gaudi2_qman_lower_cp_error_cause[j] : 7045 gaudi2_qman_error_cause[j]); 7046 error_count++; 7047 } 7048 7049 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7050 } 7051 7052 arb_err_val = RREG32(arb_err_addr); 7053 7054 if (!arb_err_val) 7055 goto out; 7056 7057 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7058 if (arb_err_val & BIT(j)) { 7059 gaudi2_print_event(hdev, event_type, true, 7060 "ARB_ERR. err cause: %s", 7061 gaudi2_qman_arb_error_cause[j]); 7062 error_count++; 7063 } 7064 } 7065 7066 out: 7067 return error_count; 7068 } 7069 7070 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7071 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7072 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, 7073 enum gaudi2_engine_id id, u64 *event_mask) 7074 { 7075 u32 razwi_hi, razwi_lo, razwi_xy; 7076 u16 eng_id = id; 7077 u8 rd_wr_flag; 7078 7079 if (is_write) { 7080 if (read_razwi_regs) { 7081 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7082 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7083 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7084 } else { 7085 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg); 7086 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); 7087 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); 7088 } 7089 rd_wr_flag = HL_RAZWI_WRITE; 7090 } else { 7091 if (read_razwi_regs) { 7092 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7093 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7094 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7095 } else { 7096 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg); 7097 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); 7098 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); 7099 } 7100 rd_wr_flag = HL_RAZWI_READ; 7101 } 7102 7103 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7104 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7105 7106 dev_err_ratelimited(hdev->dev, 7107 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7108 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7109 } 7110 7111 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7112 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7113 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, 7114 enum gaudi2_engine_id id, u64 *event_mask) 7115 { 7116 u32 razwi_addr, razwi_xy; 7117 u16 eng_id = id; 7118 u8 rd_wr_flag; 7119 7120 if (is_write) { 7121 if (read_razwi_regs) { 7122 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7123 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7124 } else { 7125 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg); 7126 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg); 7127 } 7128 7129 rd_wr_flag = HL_RAZWI_WRITE; 7130 } else { 7131 if (read_razwi_regs) { 7132 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7133 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7134 } else { 7135 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg); 7136 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg); 7137 } 7138 7139 rd_wr_flag = HL_RAZWI_READ; 7140 } 7141 7142 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7143 dev_err_ratelimited(hdev->dev, 7144 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", 7145 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7146 razwi_xy); 7147 } 7148 7149 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7150 enum razwi_event_sources module, u8 module_idx) 7151 { 7152 switch (module) { 7153 case RAZWI_TPC: 7154 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7155 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7156 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7157 (module_idx % NUM_OF_TPC_PER_DCORE) + 7158 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7159 7160 case RAZWI_MME: 7161 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 7162 (module_idx * ENGINE_ID_DCORE_OFFSET)); 7163 7164 case RAZWI_EDMA: 7165 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7166 (module_idx % NUM_OF_EDMA_PER_DCORE)); 7167 7168 case RAZWI_PDMA: 7169 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 7170 7171 case RAZWI_NIC: 7172 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 7173 7174 case RAZWI_DEC: 7175 if (module_idx == 8) 7176 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 7177 7178 if (module_idx == 9) 7179 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 7180 ; 7181 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7182 (module_idx % NUM_OF_DEC_PER_DCORE) + 7183 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7184 7185 case RAZWI_ROT: 7186 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 7187 7188 default: 7189 return GAUDI2_ENGINE_ID_SIZE; 7190 } 7191 } 7192 7193 /* 7194 * This function handles RR(Range register) hit events. 7195 * raised be initiators not PSOC RAZWI. 7196 */ 7197 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 7198 enum razwi_event_sources module, u8 module_idx, 7199 u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info, 7200 u64 *event_mask) 7201 { 7202 bool via_sft = false, read_razwi_regs = false; 7203 u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id; 7204 u64 rtr_mstr_if_base_addr; 7205 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 7206 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 7207 char initiator_name[64]; 7208 7209 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info) 7210 read_razwi_regs = true; 7211 7212 switch (module) { 7213 case RAZWI_TPC: 7214 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx]; 7215 sprintf(initiator_name, "TPC_%u", module_idx); 7216 break; 7217 case RAZWI_MME: 7218 sprintf(initiator_name, "MME_%u", module_idx); 7219 switch (module_sub_idx) { 7220 case MME_WAP0: 7221 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 7222 break; 7223 case MME_WAP1: 7224 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 7225 break; 7226 case MME_WRITE: 7227 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 7228 break; 7229 case MME_READ: 7230 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 7231 break; 7232 case MME_SBTE0: 7233 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 7234 break; 7235 case MME_SBTE1: 7236 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 7237 break; 7238 case MME_SBTE2: 7239 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 7240 break; 7241 case MME_SBTE3: 7242 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 7243 break; 7244 case MME_SBTE4: 7245 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 7246 break; 7247 default: 7248 return; 7249 } 7250 break; 7251 case RAZWI_EDMA: 7252 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id; 7253 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id; 7254 via_sft = true; 7255 sprintf(initiator_name, "EDMA_%u", module_idx); 7256 break; 7257 case RAZWI_PDMA: 7258 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx]; 7259 sprintf(initiator_name, "PDMA_%u", module_idx); 7260 break; 7261 case RAZWI_NIC: 7262 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx]; 7263 sprintf(initiator_name, "NIC_%u", module_idx); 7264 break; 7265 case RAZWI_DEC: 7266 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx]; 7267 sprintf(initiator_name, "DEC_%u", module_idx); 7268 break; 7269 case RAZWI_ROT: 7270 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx]; 7271 sprintf(initiator_name, "ROT_%u", module_idx); 7272 break; 7273 default: 7274 return; 7275 } 7276 7277 if (!read_razwi_regs) { 7278 if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) { 7279 hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) & 7280 RAZWI_HAPPENED_AW; 7281 hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) & 7282 RAZWI_HAPPENED_AR; 7283 } else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) { 7284 lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) & 7285 RAZWI_HAPPENED_AW; 7286 lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) & 7287 RAZWI_HAPPENED_AR; 7288 } 7289 rtr_mstr_if_base_addr = 0; 7290 7291 goto dump_info; 7292 } 7293 7294 /* Find router mstr_if register base */ 7295 if (via_sft) { 7296 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + 7297 dcore_id * SFT_DCORE_OFFSET + 7298 sft_id * SFT_IF_OFFSET + 7299 RTR_MSTR_IF_OFFSET; 7300 } else { 7301 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7302 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7303 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 7304 dcore_id * DCORE_OFFSET + 7305 dcore_rtr_id * DCORE_RTR_OFFSET + 7306 RTR_MSTR_IF_OFFSET; 7307 } 7308 7309 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 7310 hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 7311 7312 hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 7313 7314 if (via_sft) { 7315 /* SFT has separate MSTR_IF for LBW, only there we can 7316 * read the LBW razwi related registers 7317 */ 7318 u64 base; 7319 7320 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET + 7321 RTR_LBW_MSTR_IF_OFFSET; 7322 7323 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7324 7325 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7326 } else { 7327 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7328 7329 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7330 } 7331 7332 dump_info: 7333 /* check if there is no RR razwi indication at all */ 7334 if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar) 7335 return; 7336 7337 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 7338 if (hbw_shrd_aw) { 7339 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7340 initiator_name, read_razwi_regs, razwi_info, 7341 eng_id, event_mask); 7342 7343 /* Clear event indication */ 7344 if (read_razwi_regs) 7345 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 7346 } 7347 7348 if (hbw_shrd_ar) { 7349 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7350 initiator_name, read_razwi_regs, razwi_info, 7351 eng_id, event_mask); 7352 7353 /* Clear event indication */ 7354 if (read_razwi_regs) 7355 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 7356 } 7357 7358 if (lbw_shrd_aw) { 7359 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7360 initiator_name, read_razwi_regs, razwi_info, 7361 eng_id, event_mask); 7362 7363 /* Clear event indication */ 7364 if (read_razwi_regs) 7365 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 7366 } 7367 7368 if (lbw_shrd_ar) { 7369 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7370 initiator_name, read_razwi_regs, razwi_info, 7371 eng_id, event_mask); 7372 7373 /* Clear event indication */ 7374 if (read_razwi_regs) 7375 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 7376 } 7377 } 7378 7379 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 7380 { 7381 struct asic_fixed_properties *prop = &hdev->asic_prop; 7382 u8 mod_idx, sub_mod; 7383 7384 /* check all TPCs */ 7385 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 7386 if (prop->tpc_enabled_mask & BIT(mod_idx)) 7387 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL, 7388 NULL); 7389 } 7390 7391 /* check all MMEs */ 7392 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7393 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 7394 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 7395 sub_mod, NULL, NULL); 7396 7397 /* check all EDMAs */ 7398 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7399 if (prop->edma_enabled_mask & BIT(mod_idx)) 7400 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL, 7401 NULL); 7402 7403 /* check all PDMAs */ 7404 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 7405 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL, 7406 NULL); 7407 7408 /* check all NICs */ 7409 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 7410 if (hdev->nic_ports_mask & BIT(mod_idx)) 7411 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 7412 NULL, NULL); 7413 7414 /* check all DECs */ 7415 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 7416 if (prop->decoder_enabled_mask & BIT(mod_idx)) 7417 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL, 7418 NULL); 7419 7420 /* check all ROTs */ 7421 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 7422 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL); 7423 } 7424 7425 static const char *gaudi2_get_initiators_name(u32 rtr_id) 7426 { 7427 switch (rtr_id) { 7428 case DCORE0_RTR0: 7429 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; 7430 case DCORE0_RTR1: 7431 return "TPC0/1"; 7432 case DCORE0_RTR2: 7433 return "TPC2/3"; 7434 case DCORE0_RTR3: 7435 return "TPC4/5"; 7436 case DCORE0_RTR4: 7437 return "MME0_SBTE0/1"; 7438 case DCORE0_RTR5: 7439 return "MME0_WAP0/SBTE2"; 7440 case DCORE0_RTR6: 7441 return "MME0_CTRL_WR/SBTE3"; 7442 case DCORE0_RTR7: 7443 return "MME0_WAP1/CTRL_RD/SBTE4"; 7444 case DCORE1_RTR0: 7445 return "MME1_WAP1/CTRL_RD/SBTE4"; 7446 case DCORE1_RTR1: 7447 return "MME1_CTRL_WR/SBTE3"; 7448 case DCORE1_RTR2: 7449 return "MME1_WAP0/SBTE2"; 7450 case DCORE1_RTR3: 7451 return "MME1_SBTE0/1"; 7452 case DCORE1_RTR4: 7453 return "TPC10/11"; 7454 case DCORE1_RTR5: 7455 return "TPC8/9"; 7456 case DCORE1_RTR6: 7457 return "TPC6/7"; 7458 case DCORE1_RTR7: 7459 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; 7460 case DCORE2_RTR0: 7461 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; 7462 case DCORE2_RTR1: 7463 return "TPC16/17"; 7464 case DCORE2_RTR2: 7465 return "TPC14/15"; 7466 case DCORE2_RTR3: 7467 return "TPC12/13"; 7468 case DCORE2_RTR4: 7469 return "MME2_SBTE0/1"; 7470 case DCORE2_RTR5: 7471 return "MME2_WAP0/SBTE2"; 7472 case DCORE2_RTR6: 7473 return "MME2_CTRL_WR/SBTE3"; 7474 case DCORE2_RTR7: 7475 return "MME2_WAP1/CTRL_RD/SBTE4"; 7476 case DCORE3_RTR0: 7477 return "MME3_WAP1/CTRL_RD/SBTE4"; 7478 case DCORE3_RTR1: 7479 return "MME3_CTRL_WR/SBTE3"; 7480 case DCORE3_RTR2: 7481 return "MME3_WAP0/SBTE2"; 7482 case DCORE3_RTR3: 7483 return "MME3_SBTE0/1"; 7484 case DCORE3_RTR4: 7485 return "TPC18/19"; 7486 case DCORE3_RTR5: 7487 return "TPC20/21"; 7488 case DCORE3_RTR6: 7489 return "TPC22/23"; 7490 case DCORE3_RTR7: 7491 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; 7492 default: 7493 return "N/A"; 7494 } 7495 } 7496 7497 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) 7498 { 7499 switch (rtr_id) { 7500 case DCORE0_RTR0: 7501 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; 7502 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; 7503 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; 7504 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; 7505 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7506 engines[5] = GAUDI2_ENGINE_ID_PDMA_0; 7507 engines[6] = GAUDI2_ENGINE_ID_PDMA_1; 7508 engines[7] = GAUDI2_ENGINE_ID_PCIE; 7509 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; 7510 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; 7511 engines[10] = GAUDI2_ENGINE_ID_PSOC; 7512 return 11; 7513 7514 case DCORE0_RTR1: 7515 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; 7516 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; 7517 return 2; 7518 7519 case DCORE0_RTR2: 7520 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; 7521 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; 7522 return 2; 7523 7524 case DCORE0_RTR3: 7525 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; 7526 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; 7527 return 2; 7528 7529 case DCORE0_RTR4: 7530 case DCORE0_RTR5: 7531 case DCORE0_RTR6: 7532 case DCORE0_RTR7: 7533 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; 7534 return 1; 7535 7536 case DCORE1_RTR0: 7537 case DCORE1_RTR1: 7538 case DCORE1_RTR2: 7539 case DCORE1_RTR3: 7540 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; 7541 return 1; 7542 7543 case DCORE1_RTR4: 7544 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; 7545 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; 7546 return 2; 7547 7548 case DCORE1_RTR5: 7549 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; 7550 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; 7551 return 2; 7552 7553 case DCORE1_RTR6: 7554 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; 7555 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; 7556 return 2; 7557 7558 case DCORE1_RTR7: 7559 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; 7560 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; 7561 engines[2] = GAUDI2_ENGINE_ID_NIC0_0; 7562 engines[3] = GAUDI2_ENGINE_ID_NIC1_0; 7563 engines[4] = GAUDI2_ENGINE_ID_NIC2_0; 7564 engines[5] = GAUDI2_ENGINE_ID_NIC3_0; 7565 engines[6] = GAUDI2_ENGINE_ID_NIC4_0; 7566 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; 7567 engines[8] = GAUDI2_ENGINE_ID_KDMA; 7568 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; 7569 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; 7570 return 11; 7571 7572 case DCORE2_RTR0: 7573 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; 7574 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; 7575 engines[2] = GAUDI2_ENGINE_ID_NIC5_0; 7576 engines[3] = GAUDI2_ENGINE_ID_NIC6_0; 7577 engines[4] = GAUDI2_ENGINE_ID_NIC7_0; 7578 engines[5] = GAUDI2_ENGINE_ID_NIC8_0; 7579 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; 7580 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; 7581 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7582 return 9; 7583 7584 case DCORE2_RTR1: 7585 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; 7586 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; 7587 return 2; 7588 7589 case DCORE2_RTR2: 7590 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; 7591 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; 7592 return 2; 7593 7594 case DCORE2_RTR3: 7595 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; 7596 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; 7597 return 2; 7598 7599 case DCORE2_RTR4: 7600 case DCORE2_RTR5: 7601 case DCORE2_RTR6: 7602 case DCORE2_RTR7: 7603 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; 7604 return 1; 7605 case DCORE3_RTR0: 7606 case DCORE3_RTR1: 7607 case DCORE3_RTR2: 7608 case DCORE3_RTR3: 7609 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; 7610 return 1; 7611 case DCORE3_RTR4: 7612 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; 7613 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; 7614 return 2; 7615 case DCORE3_RTR5: 7616 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; 7617 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; 7618 return 2; 7619 case DCORE3_RTR6: 7620 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; 7621 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; 7622 return 2; 7623 case DCORE3_RTR7: 7624 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; 7625 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; 7626 engines[2] = GAUDI2_ENGINE_ID_NIC9_0; 7627 engines[3] = GAUDI2_ENGINE_ID_NIC10_0; 7628 engines[4] = GAUDI2_ENGINE_ID_NIC11_0; 7629 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; 7630 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; 7631 engines[7] = GAUDI2_ENGINE_ID_ROT_1; 7632 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7633 return 9; 7634 default: 7635 return 0; 7636 } 7637 } 7638 7639 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7640 u64 rtr_ctrl_base_addr, bool is_write, 7641 u64 *event_mask) 7642 { 7643 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7644 u32 razwi_hi, razwi_lo; 7645 u8 rd_wr_flag; 7646 7647 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7648 7649 if (is_write) { 7650 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); 7651 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); 7652 rd_wr_flag = HL_RAZWI_WRITE; 7653 7654 /* Clear set indication */ 7655 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); 7656 } else { 7657 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); 7658 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); 7659 rd_wr_flag = HL_RAZWI_READ; 7660 7661 /* Clear set indication */ 7662 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); 7663 } 7664 7665 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, 7666 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7667 dev_err_ratelimited(hdev->dev, 7668 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", 7669 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); 7670 7671 dev_err_ratelimited(hdev->dev, 7672 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7673 } 7674 7675 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7676 u64 rtr_ctrl_base_addr, bool is_write, 7677 u64 *event_mask) 7678 { 7679 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7680 u32 razwi_addr; 7681 u8 rd_wr_flag; 7682 7683 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7684 7685 if (is_write) { 7686 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); 7687 rd_wr_flag = HL_RAZWI_WRITE; 7688 7689 /* Clear set indication */ 7690 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); 7691 } else { 7692 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); 7693 rd_wr_flag = HL_RAZWI_READ; 7694 7695 /* Clear set indication */ 7696 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); 7697 } 7698 7699 hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, 7700 event_mask); 7701 dev_err_ratelimited(hdev->dev, 7702 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", 7703 is_write ? "WR" : "RD", rtr_id, razwi_addr); 7704 7705 dev_err_ratelimited(hdev->dev, 7706 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7707 } 7708 7709 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 7710 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 7711 { 7712 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, 7713 razwi_mask_info, razwi_intr = 0, error_count = 0; 7714 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES; 7715 u64 rtr_ctrl_base_addr; 7716 7717 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 7718 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 7719 if (!razwi_intr) 7720 return 0; 7721 } 7722 7723 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 7724 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); 7725 7726 dev_err_ratelimited(hdev->dev, 7727 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7728 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 7729 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 7730 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 7731 xy, 7732 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 7733 7734 if (xy == 0) { 7735 dev_err_ratelimited(hdev->dev, 7736 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); 7737 goto clear; 7738 } 7739 7740 /* Find router id by router coordinates */ 7741 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++) 7742 if (rtr_coordinates_to_rtr_id[rtr_id] == xy) 7743 break; 7744 7745 if (rtr_id == rtr_map_arr_len) { 7746 dev_err_ratelimited(hdev->dev, 7747 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy); 7748 goto clear; 7749 } 7750 7751 /* Find router mstr_if register base */ 7752 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7753 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7754 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET + 7755 dcore_rtr_id * DCORE_RTR_OFFSET; 7756 7757 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET); 7758 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET); 7759 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET); 7760 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); 7761 7762 if (hbw_aw_set) 7763 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7764 rtr_ctrl_base_addr, true, event_mask); 7765 7766 if (hbw_ar_set) 7767 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7768 rtr_ctrl_base_addr, false, event_mask); 7769 7770 if (lbw_aw_set) 7771 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7772 rtr_ctrl_base_addr, true, event_mask); 7773 7774 if (lbw_ar_set) 7775 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7776 rtr_ctrl_base_addr, false, event_mask); 7777 7778 error_count++; 7779 7780 clear: 7781 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 7782 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 7783 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 7784 7785 return error_count; 7786 } 7787 7788 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 7789 { 7790 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7791 7792 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 7793 7794 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 7795 if (sts_val & BIT(i)) { 7796 gaudi2_print_event(hdev, event_type, true, 7797 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 7798 sts_clr_val |= BIT(i); 7799 error_count++; 7800 } 7801 } 7802 7803 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 7804 7805 return error_count; 7806 } 7807 7808 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 7809 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 7810 { 7811 enum razwi_event_sources module; 7812 u32 error_count = 0; 7813 u64 qman_base; 7814 u8 index; 7815 7816 switch (event_type) { 7817 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 7818 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 7819 qman_base = mmDCORE0_TPC0_QM_BASE + 7820 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 7821 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 7822 module = RAZWI_TPC; 7823 break; 7824 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 7825 qman_base = mmDCORE0_TPC6_QM_BASE; 7826 module = RAZWI_TPC; 7827 break; 7828 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 7829 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 7830 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 7831 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 7832 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 7833 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 7834 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 7835 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 7836 module = RAZWI_MME; 7837 break; 7838 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 7839 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 7840 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 7841 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 7842 module = RAZWI_PDMA; 7843 break; 7844 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 7845 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 7846 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 7847 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 7848 module = RAZWI_ROT; 7849 break; 7850 default: 7851 return 0; 7852 } 7853 7854 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7855 7856 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 7857 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 7858 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 7859 error_count += _gaudi2_handle_qm_sei_err(hdev, 7860 qman_base + NIC_QM_OFFSET, event_type); 7861 7862 /* check if RAZWI happened */ 7863 if (razwi_info) 7864 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask); 7865 7866 return error_count; 7867 } 7868 7869 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type) 7870 { 7871 u32 qid_base, error_count = 0; 7872 u64 qman_base; 7873 u8 index; 7874 7875 switch (event_type) { 7876 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 7877 index = event_type - GAUDI2_EVENT_TPC0_QM; 7878 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 7879 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7880 break; 7881 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 7882 index = event_type - GAUDI2_EVENT_TPC6_QM; 7883 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 7884 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7885 break; 7886 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 7887 index = event_type - GAUDI2_EVENT_TPC12_QM; 7888 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 7889 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7890 break; 7891 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 7892 index = event_type - GAUDI2_EVENT_TPC18_QM; 7893 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 7894 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7895 break; 7896 case GAUDI2_EVENT_TPC24_QM: 7897 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 7898 qman_base = mmDCORE0_TPC6_QM_BASE; 7899 break; 7900 case GAUDI2_EVENT_MME0_QM: 7901 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 7902 qman_base = mmDCORE0_MME_QM_BASE; 7903 break; 7904 case GAUDI2_EVENT_MME1_QM: 7905 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 7906 qman_base = mmDCORE1_MME_QM_BASE; 7907 break; 7908 case GAUDI2_EVENT_MME2_QM: 7909 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 7910 qman_base = mmDCORE2_MME_QM_BASE; 7911 break; 7912 case GAUDI2_EVENT_MME3_QM: 7913 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 7914 qman_base = mmDCORE3_MME_QM_BASE; 7915 break; 7916 case GAUDI2_EVENT_HDMA0_QM: 7917 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 7918 qman_base = mmDCORE0_EDMA0_QM_BASE; 7919 break; 7920 case GAUDI2_EVENT_HDMA1_QM: 7921 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 7922 qman_base = mmDCORE0_EDMA1_QM_BASE; 7923 break; 7924 case GAUDI2_EVENT_HDMA2_QM: 7925 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 7926 qman_base = mmDCORE1_EDMA0_QM_BASE; 7927 break; 7928 case GAUDI2_EVENT_HDMA3_QM: 7929 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 7930 qman_base = mmDCORE1_EDMA1_QM_BASE; 7931 break; 7932 case GAUDI2_EVENT_HDMA4_QM: 7933 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 7934 qman_base = mmDCORE2_EDMA0_QM_BASE; 7935 break; 7936 case GAUDI2_EVENT_HDMA5_QM: 7937 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 7938 qman_base = mmDCORE2_EDMA1_QM_BASE; 7939 break; 7940 case GAUDI2_EVENT_HDMA6_QM: 7941 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 7942 qman_base = mmDCORE3_EDMA0_QM_BASE; 7943 break; 7944 case GAUDI2_EVENT_HDMA7_QM: 7945 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 7946 qman_base = mmDCORE3_EDMA1_QM_BASE; 7947 break; 7948 case GAUDI2_EVENT_PDMA0_QM: 7949 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 7950 qman_base = mmPDMA0_QM_BASE; 7951 break; 7952 case GAUDI2_EVENT_PDMA1_QM: 7953 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 7954 qman_base = mmPDMA1_QM_BASE; 7955 break; 7956 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 7957 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 7958 qman_base = mmROT0_QM_BASE; 7959 break; 7960 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 7961 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 7962 qman_base = mmROT1_QM_BASE; 7963 break; 7964 default: 7965 return 0; 7966 } 7967 7968 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 7969 7970 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 7971 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) 7972 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7973 7974 return error_count; 7975 } 7976 7977 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 7978 { 7979 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7980 7981 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); 7982 7983 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 7984 if (sts_val & BIT(i)) { 7985 gaudi2_print_event(hdev, event_type, true, 7986 "err cause: %s", gaudi2_arc_sei_error_cause[i]); 7987 sts_clr_val |= BIT(i); 7988 error_count++; 7989 } 7990 } 7991 7992 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); 7993 7994 return error_count; 7995 } 7996 7997 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 7998 { 7999 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8000 8001 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 8002 8003 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 8004 if (sts_val & BIT(i)) { 8005 gaudi2_print_event(hdev, event_type, true, 8006 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 8007 sts_clr_val |= BIT(i); 8008 error_count++; 8009 } 8010 } 8011 8012 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 8013 8014 return error_count; 8015 } 8016 8017 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 8018 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8019 u64 *event_mask) 8020 { 8021 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8022 u32 error_count = 0; 8023 int i; 8024 8025 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 8026 if (intr_cause_data & BIT(i)) { 8027 gaudi2_print_event(hdev, event_type, true, 8028 "err cause: %s", guadi2_rot_error_cause[i]); 8029 error_count++; 8030 } 8031 8032 /* check if RAZWI happened */ 8033 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, 8034 &razwi_with_intr_cause->razwi_info, event_mask); 8035 8036 return error_count; 8037 } 8038 8039 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 8040 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8041 u64 *event_mask) 8042 { 8043 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8044 u32 error_count = 0; 8045 int i; 8046 8047 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 8048 if (intr_cause_data & BIT(i)) { 8049 gaudi2_print_event(hdev, event_type, true, 8050 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 8051 error_count++; 8052 } 8053 8054 /* check if RAZWI happened */ 8055 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, 8056 &razwi_with_intr_cause->razwi_info, event_mask); 8057 8058 return error_count; 8059 } 8060 8061 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8062 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8063 { 8064 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8065 int i; 8066 8067 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8068 /* DCORE DEC */ 8069 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8070 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8071 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8072 else 8073 /* PCIE DEC */ 8074 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8075 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8076 8077 sts_val = RREG32(sts_addr); 8078 8079 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8080 if (sts_val & BIT(i)) { 8081 gaudi2_print_event(hdev, event_type, true, 8082 "err cause: %s", gaudi2_dec_error_cause[i]); 8083 sts_clr_val |= BIT(i); 8084 error_count++; 8085 } 8086 } 8087 8088 /* check if RAZWI happened */ 8089 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info, 8090 event_mask); 8091 8092 /* Write 1 clear errors */ 8093 WREG32(sts_addr, sts_clr_val); 8094 8095 return error_count; 8096 } 8097 8098 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8099 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8100 { 8101 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8102 int i; 8103 8104 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8105 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8106 8107 sts_val = RREG32(sts_addr); 8108 8109 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8110 if (sts_val & BIT(i)) { 8111 gaudi2_print_event(hdev, event_type, true, 8112 "err cause: %s", guadi2_mme_error_cause[i]); 8113 sts_clr_val |= BIT(i); 8114 error_count++; 8115 } 8116 } 8117 8118 /* check if RAZWI happened */ 8119 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8120 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info, 8121 event_mask); 8122 8123 WREG32(sts_clr_addr, sts_clr_val); 8124 8125 return error_count; 8126 } 8127 8128 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8129 u64 intr_cause_data) 8130 { 8131 int i, error_count = 0; 8132 8133 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8134 if (intr_cause_data & BIT(i)) { 8135 gaudi2_print_event(hdev, event_type, true, 8136 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8137 error_count++; 8138 } 8139 8140 return error_count; 8141 } 8142 8143 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8144 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8145 { 8146 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8147 int i; 8148 8149 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8150 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8151 8152 sts_val = RREG32(sts_addr); 8153 8154 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8155 if (sts_val & BIT(i)) { 8156 gaudi2_print_event(hdev, event_type, true, 8157 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8158 sts_clr_val |= BIT(i); 8159 error_count++; 8160 } 8161 } 8162 8163 /* check if RAZWI happened on WAP0/1 */ 8164 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info, 8165 event_mask); 8166 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info, 8167 event_mask); 8168 8169 WREG32(sts_clr_addr, sts_clr_val); 8170 8171 return error_count; 8172 } 8173 8174 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8175 u64 intr_cause_data) 8176 { 8177 u32 error_count = 0; 8178 int i; 8179 8180 /* If an AXI read or write error is received, an error is reported and 8181 * interrupt message is sent. Due to an HW errata, when reading the cause 8182 * register of the KDMA engine, the reported error is always HBW even if 8183 * the actual error caused by a LBW KDMA transaction. 8184 */ 8185 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8186 if (intr_cause_data & BIT(i)) { 8187 gaudi2_print_event(hdev, event_type, true, 8188 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8189 error_count++; 8190 } 8191 8192 return error_count; 8193 } 8194 8195 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, 8196 u64 intr_cause_data) 8197 { 8198 u32 error_count = 0; 8199 int i; 8200 8201 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8202 if (intr_cause_data & BIT(i)) { 8203 gaudi2_print_event(hdev, event_type, true, 8204 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8205 error_count++; 8206 } 8207 8208 return error_count; 8209 } 8210 8211 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8212 { 8213 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8214 8215 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8216 if (RREG32(razwi_happened_addr)) { 8217 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 8218 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8219 WREG32(razwi_happened_addr, 0x1); 8220 } 8221 8222 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8223 if (RREG32(razwi_happened_addr)) { 8224 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 8225 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8226 WREG32(razwi_happened_addr, 0x1); 8227 } 8228 8229 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8230 if (RREG32(razwi_happened_addr)) { 8231 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 8232 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8233 WREG32(razwi_happened_addr, 0x1); 8234 } 8235 8236 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8237 if (RREG32(razwi_happened_addr)) { 8238 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 8239 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8240 WREG32(razwi_happened_addr, 0x1); 8241 } 8242 } 8243 8244 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8245 u64 intr_cause_data, u64 *event_mask) 8246 { 8247 u32 error_count = 0; 8248 int i; 8249 8250 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8251 if (!(intr_cause_data & BIT_ULL(i))) 8252 continue; 8253 8254 gaudi2_print_event(hdev, event_type, true, 8255 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8256 error_count++; 8257 8258 switch (intr_cause_data & BIT_ULL(i)) { 8259 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 8260 break; 8261 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 8262 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8263 break; 8264 } 8265 } 8266 8267 return error_count; 8268 } 8269 8270 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8271 u64 intr_cause_data) 8272 8273 { 8274 u32 error_count = 0; 8275 int i; 8276 8277 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8278 if (intr_cause_data & BIT_ULL(i)) { 8279 gaudi2_print_event(hdev, event_type, true, 8280 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8281 error_count++; 8282 } 8283 } 8284 8285 return error_count; 8286 } 8287 8288 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8289 { 8290 u32 error_count = 0; 8291 int i; 8292 8293 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8294 if (intr_cause_data & BIT_ULL(i)) { 8295 gaudi2_print_event(hdev, event_type, true, 8296 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8297 error_count++; 8298 } 8299 } 8300 8301 return error_count; 8302 } 8303 8304 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8305 u64 *event_mask) 8306 { 8307 u32 valid, val; 8308 u64 addr; 8309 8310 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8311 8312 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8313 return; 8314 8315 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8316 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8317 addr <<= 32; 8318 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8319 8320 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", 8321 is_pmmu ? "PMMU" : "HMMU", addr); 8322 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8323 8324 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); 8325 } 8326 8327 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8328 { 8329 u32 valid, val; 8330 u64 addr; 8331 8332 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8333 8334 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8335 return; 8336 8337 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8338 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8339 addr <<= 32; 8340 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8341 8342 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8343 is_pmmu ? "PMMU" : "HMMU", addr); 8344 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); 8345 } 8346 8347 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8348 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8349 { 8350 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8351 int i; 8352 8353 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8354 8355 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8356 if (spi_sei_cause & BIT(i)) { 8357 gaudi2_print_event(hdev, event_type, true, 8358 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8359 8360 if (i == 0) 8361 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8362 else if (i == 1) 8363 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8364 8365 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8366 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8367 8368 error_count++; 8369 } 8370 } 8371 8372 /* Clear cause */ 8373 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8374 8375 /* Clear interrupt */ 8376 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8377 8378 return error_count; 8379 } 8380 8381 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 8382 { 8383 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 8384 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 8385 int i; 8386 8387 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 8388 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 8389 8390 sei_cause_val = RREG32(sei_cause_addr); 8391 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 8392 cq_intr_val = RREG32(cq_intr_addr); 8393 8394 /* SEI interrupt */ 8395 if (sei_cause_cause) { 8396 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 8397 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 8398 sei_cause_val); 8399 8400 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 8401 if (!(sei_cause_cause & BIT(i))) 8402 continue; 8403 8404 gaudi2_print_event(hdev, event_type, true, 8405 "err cause: %s. %s: 0x%X\n", 8406 gaudi2_sm_sei_cause[i].cause_name, 8407 gaudi2_sm_sei_cause[i].log_name, 8408 sei_cause_log & gaudi2_sm_sei_cause[i].log_mask); 8409 error_count++; 8410 break; 8411 } 8412 8413 /* Clear SM_SEI_CAUSE */ 8414 WREG32(sei_cause_addr, 0); 8415 } 8416 8417 /* CQ interrupt */ 8418 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 8419 cq_intr_queue_index = 8420 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 8421 cq_intr_val); 8422 8423 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 8424 sm_index, cq_intr_queue_index); 8425 error_count++; 8426 8427 /* Clear CQ_INTR */ 8428 WREG32(cq_intr_addr, 0); 8429 } 8430 8431 return error_count; 8432 } 8433 8434 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8435 { 8436 bool is_pmmu = false; 8437 u32 error_count = 0; 8438 u64 mmu_base; 8439 u8 index; 8440 8441 switch (event_type) { 8442 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 8443 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; 8444 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8445 break; 8446 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 8447 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); 8448 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8449 break; 8450 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 8451 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; 8452 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8453 break; 8454 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 8455 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); 8456 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8457 break; 8458 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 8459 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; 8460 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8461 break; 8462 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 8463 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); 8464 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8465 break; 8466 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 8467 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; 8468 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8469 break; 8470 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 8471 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); 8472 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8473 break; 8474 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 8475 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8476 is_pmmu = true; 8477 mmu_base = mmPMMU_HBW_MMU_BASE; 8478 break; 8479 default: 8480 return 0; 8481 } 8482 8483 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 8484 is_pmmu, event_mask); 8485 8486 return error_count; 8487 } 8488 8489 8490 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 8491 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 8492 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 8493 { 8494 u32 addr, beat, beat_shift; 8495 bool rc = false; 8496 8497 dev_err_ratelimited(hdev->dev, 8498 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 8499 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 8500 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 8501 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 8502 8503 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 8504 dev_err_ratelimited(hdev->dev, 8505 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 8506 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 8507 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 8508 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 8509 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 8510 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 8511 8512 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 8513 for (beat = 0 ; beat < 4 ; beat++) { 8514 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8515 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 8516 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 8517 beat, 8518 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8519 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8520 8521 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8522 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 8523 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 8524 beat, 8525 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8526 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8527 rc |= true; 8528 } 8529 8530 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 8531 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8532 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 8533 dev_err_ratelimited(hdev->dev, 8534 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 8535 beat, 8536 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8537 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8538 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 8539 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 8540 rc |= true; 8541 } 8542 8543 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 8544 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8545 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 8546 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8547 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 8548 } 8549 8550 return rc; 8551 } 8552 8553 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 8554 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 8555 { 8556 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 8557 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 8558 8559 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 8560 8561 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 8562 derr & 0x3, derr & 0xc); 8563 8564 /* JIRA H6-3286 - the following prints may not be valid */ 8565 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 8566 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 8567 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 8568 dev_err_ratelimited(hdev->dev, 8569 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 8570 i, 8571 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 8572 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 8573 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 8574 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 8575 } 8576 } 8577 8578 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 8579 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 8580 { 8581 __le32 *col_cmd = ca_par_err_data->dbg_col; 8582 __le16 *row_cmd = ca_par_err_data->dbg_row; 8583 u32 i; 8584 8585 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 8586 8587 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 8588 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 8589 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 8590 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 8591 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 8592 } 8593 8594 /* Returns true if hard reset is needed or false otherwise */ 8595 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 8596 struct hl_eq_hbm_sei_data *sei_data) 8597 { 8598 bool require_hard_reset = false; 8599 u32 hbm_id, mc_id, cause_idx; 8600 8601 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 8602 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 8603 8604 cause_idx = sei_data->hdr.sei_cause; 8605 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 8606 gaudi2_print_event(hdev, event_type, true, 8607 "err cause: %s", 8608 "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx); 8609 return true; 8610 } 8611 8612 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 8613 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8614 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 8615 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8616 hbm_mc_sei_cause[cause_idx]); 8617 8618 /* Print error-specific info */ 8619 switch (cause_idx) { 8620 case HBM_SEI_CATTRIP: 8621 require_hard_reset = true; 8622 break; 8623 8624 case HBM_SEI_CMD_PARITY_EVEN: 8625 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 8626 le32_to_cpu(sei_data->hdr.cnt)); 8627 require_hard_reset = true; 8628 break; 8629 8630 case HBM_SEI_CMD_PARITY_ODD: 8631 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 8632 le32_to_cpu(sei_data->hdr.cnt)); 8633 require_hard_reset = true; 8634 break; 8635 8636 case HBM_SEI_WRITE_DATA_PARITY_ERR: 8637 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 8638 le32_to_cpu(sei_data->hdr.cnt)); 8639 require_hard_reset = true; 8640 break; 8641 8642 case HBM_SEI_READ_ERR: 8643 /* Unlike other SEI events, read error requires further processing of the 8644 * raw data in order to determine the root cause. 8645 */ 8646 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 8647 &sei_data->read_err_info, 8648 le32_to_cpu(sei_data->hdr.cnt)); 8649 break; 8650 8651 default: 8652 break; 8653 } 8654 8655 require_hard_reset |= !!sei_data->hdr.is_critical; 8656 8657 return require_hard_reset; 8658 } 8659 8660 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 8661 u64 intr_cause_data) 8662 { 8663 if (intr_cause_data) { 8664 gaudi2_print_event(hdev, event_type, true, 8665 "temperature error cause: %#llx", intr_cause_data); 8666 return 1; 8667 } 8668 8669 return 0; 8670 } 8671 8672 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 8673 { 8674 u32 i, error_count = 0; 8675 8676 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 8677 if (intr_cause_data & hbm_mc_spi[i].mask) { 8678 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 8679 hbm_mc_spi[i].cause); 8680 error_count++; 8681 } 8682 8683 return error_count; 8684 } 8685 8686 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8687 { 8688 ktime_t zero_time = ktime_set(0, 0); 8689 8690 mutex_lock(&hdev->clk_throttling.lock); 8691 8692 switch (event_type) { 8693 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 8694 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 8695 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 8696 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 8697 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 8698 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 8699 break; 8700 8701 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 8702 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 8703 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 8704 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 8705 break; 8706 8707 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 8708 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 8709 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 8710 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 8711 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 8712 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8713 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 8714 break; 8715 8716 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 8717 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 8718 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 8719 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8720 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 8721 break; 8722 8723 default: 8724 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 8725 break; 8726 } 8727 8728 mutex_unlock(&hdev->clk_throttling.lock); 8729 } 8730 8731 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 8732 struct cpucp_pkt_sync_err *sync_err) 8733 { 8734 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8735 8736 gaudi2_print_event(hdev, event_type, false, 8737 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8738 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 8739 q->pi, atomic_read(&q->ci)); 8740 } 8741 8742 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 8743 { 8744 u32 p2p_intr, msix_gw_intr, error_count = 0; 8745 8746 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 8747 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 8748 8749 if (p2p_intr) { 8750 gaudi2_print_event(hdev, event_type, true, 8751 "pcie p2p transaction terminated due to security, req_id(0x%x)\n", 8752 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 8753 8754 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 8755 error_count++; 8756 } 8757 8758 if (msix_gw_intr) { 8759 gaudi2_print_event(hdev, event_type, true, 8760 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n", 8761 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 8762 8763 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 8764 error_count++; 8765 } 8766 8767 return error_count; 8768 } 8769 8770 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 8771 struct hl_eq_pcie_drain_ind_data *drain_data) 8772 { 8773 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 8774 8775 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 8776 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 8777 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 8778 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 8779 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 8780 8781 if (cause & BIT_ULL(0)) { 8782 dev_err_ratelimited(hdev->dev, 8783 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 8784 !!lbw_rd, !!lbw_wr); 8785 error_count++; 8786 } 8787 8788 if (cause & BIT_ULL(1)) { 8789 dev_err_ratelimited(hdev->dev, 8790 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 8791 hbw_rd, hbw_wr); 8792 error_count++; 8793 } 8794 8795 return error_count; 8796 } 8797 8798 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 8799 { 8800 u32 error_count = 0; 8801 int i; 8802 8803 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 8804 if (intr_cause_data & BIT_ULL(i)) { 8805 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 8806 gaudi2_psoc_axi_drain_interrupts_cause[i]); 8807 error_count++; 8808 } 8809 } 8810 8811 return error_count; 8812 } 8813 8814 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 8815 struct cpucp_pkt_sync_err *sync_err) 8816 { 8817 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8818 8819 gaudi2_print_event(hdev, event_type, false, 8820 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8821 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 8822 } 8823 8824 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 8825 struct hl_eq_engine_arc_intr_data *data) 8826 { 8827 struct hl_engine_arc_dccm_queue_full_irq *q; 8828 u32 intr_type, engine_id; 8829 u64 payload; 8830 8831 intr_type = le32_to_cpu(data->intr_type); 8832 engine_id = le32_to_cpu(data->engine_id); 8833 payload = le64_to_cpu(data->payload); 8834 8835 switch (intr_type) { 8836 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 8837 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 8838 8839 gaudi2_print_event(hdev, event_type, true, 8840 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n", 8841 engine_id, intr_type, q->queue_index); 8842 return 1; 8843 default: 8844 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n"); 8845 return 0; 8846 } 8847 } 8848 8849 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 8850 { 8851 struct gaudi2_device *gaudi2 = hdev->asic_specific; 8852 bool reset_required = false, is_critical = false; 8853 u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0; 8854 u64 event_mask = 0; 8855 u16 event_type; 8856 8857 ctl = le32_to_cpu(eq_entry->hdr.ctl); 8858 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 8859 8860 if (event_type >= GAUDI2_EVENT_SIZE) { 8861 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 8862 event_type, GAUDI2_EVENT_SIZE - 1); 8863 return; 8864 } 8865 8866 gaudi2->events_stat[event_type]++; 8867 gaudi2->events_stat_aggregate[event_type]++; 8868 8869 switch (event_type) { 8870 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 8871 fallthrough; 8872 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 8873 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8874 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8875 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 8876 is_critical = eq_entry->ecc_data.is_critical; 8877 error_count++; 8878 break; 8879 8880 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 8881 fallthrough; 8882 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8883 fallthrough; 8884 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 8885 error_count = gaudi2_handle_qman_err(hdev, event_type); 8886 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8887 break; 8888 8889 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 8890 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8891 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 8892 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8893 break; 8894 8895 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 8896 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 8897 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8898 break; 8899 8900 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8901 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8902 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8903 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, 8904 &eq_entry->razwi_info, &event_mask); 8905 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8906 break; 8907 8908 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 8909 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 8910 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8911 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 8912 &eq_entry->razwi_with_intr_cause, &event_mask); 8913 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 8914 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8915 break; 8916 8917 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 8918 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 8919 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8920 &eq_entry->razwi_with_intr_cause, &event_mask); 8921 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 8922 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8923 break; 8924 8925 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 8926 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 8927 error_count = gaudi2_handle_dec_err(hdev, index, event_type, 8928 &eq_entry->razwi_info, &event_mask); 8929 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8930 break; 8931 8932 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 8933 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 8934 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 8935 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 8936 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 8937 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 8938 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 8939 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 8940 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 8941 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 8942 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 8943 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 8944 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 8945 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 8946 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 8947 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 8948 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 8949 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 8950 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 8951 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 8952 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 8953 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 8954 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 8955 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 8956 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 8957 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 8958 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 8959 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8960 &eq_entry->razwi_with_intr_cause, &event_mask); 8961 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8962 break; 8963 8964 case GAUDI2_EVENT_DEC0_SPI: 8965 case GAUDI2_EVENT_DEC1_SPI: 8966 case GAUDI2_EVENT_DEC2_SPI: 8967 case GAUDI2_EVENT_DEC3_SPI: 8968 case GAUDI2_EVENT_DEC4_SPI: 8969 case GAUDI2_EVENT_DEC5_SPI: 8970 case GAUDI2_EVENT_DEC6_SPI: 8971 case GAUDI2_EVENT_DEC7_SPI: 8972 case GAUDI2_EVENT_DEC8_SPI: 8973 case GAUDI2_EVENT_DEC9_SPI: 8974 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 8975 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 8976 error_count = gaudi2_handle_dec_err(hdev, index, event_type, 8977 &eq_entry->razwi_info, &event_mask); 8978 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8979 break; 8980 8981 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 8982 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 8983 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 8984 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 8985 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 8986 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 8987 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 8988 error_count = gaudi2_handle_mme_err(hdev, index, event_type, 8989 &eq_entry->razwi_info, &event_mask); 8990 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 8991 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8992 break; 8993 8994 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 8995 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 8996 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 8997 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 8998 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 8999 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 9000 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 9001 error_count = gaudi2_handle_mme_err(hdev, index, event_type, 9002 &eq_entry->razwi_info, &event_mask); 9003 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9004 break; 9005 9006 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 9007 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 9008 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 9009 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 9010 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 9011 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 9012 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 9013 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, 9014 &eq_entry->razwi_info, &event_mask); 9015 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9016 break; 9017 9018 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 9019 case GAUDI2_EVENT_KDMA0_CORE: 9020 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 9021 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9022 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9023 break; 9024 9025 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: 9026 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 9027 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9028 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9029 break; 9030 9031 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 9032 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 9033 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 9034 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9035 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9036 break; 9037 9038 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9039 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9040 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9041 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9042 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 9043 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9044 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9045 break; 9046 9047 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 9048 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 9049 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9050 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9051 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9052 break; 9053 9054 case GAUDI2_EVENT_PMMU_FATAL_0: 9055 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 9056 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9057 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9058 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9059 break; 9060 9061 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 9062 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 9063 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9064 break; 9065 9066 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9067 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9068 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9069 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9070 reset_required = true; 9071 } 9072 error_count++; 9073 break; 9074 9075 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9076 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9077 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9078 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9079 break; 9080 9081 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9082 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9083 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9084 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9085 break; 9086 9087 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9088 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9089 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9090 break; 9091 9092 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9093 error_count = gaudi2_handle_psoc_drain(hdev, 9094 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9095 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9096 break; 9097 9098 case GAUDI2_EVENT_CPU_AXI_ECC: 9099 error_count = GAUDI2_NA_EVENT_CAUSE; 9100 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9101 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9102 break; 9103 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9104 error_count = GAUDI2_NA_EVENT_CAUSE; 9105 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9106 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9107 break; 9108 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9109 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9110 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9111 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9112 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9113 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9114 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9115 break; 9116 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9117 error_count = GAUDI2_NA_EVENT_CAUSE; 9118 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9119 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9120 break; 9121 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9122 error_count = GAUDI2_NA_EVENT_CAUSE; 9123 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9124 break; 9125 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9126 error_count = GAUDI2_NA_EVENT_CAUSE; 9127 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9128 break; 9129 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9130 error_count = GAUDI2_NA_EVENT_CAUSE; 9131 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9132 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9133 break; 9134 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9135 error_count = GAUDI2_NA_EVENT_CAUSE; 9136 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9137 break; 9138 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9139 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9140 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9141 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9142 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9143 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9144 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9145 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9146 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9147 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9148 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9149 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9150 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9151 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9152 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9153 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9154 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9155 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9156 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9157 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9158 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9159 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9160 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9161 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9162 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9163 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9164 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9165 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9166 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9167 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9168 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9169 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9170 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9171 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9172 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9173 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9174 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9175 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9176 fallthrough; 9177 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9178 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9179 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9180 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9181 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9182 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9183 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9184 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9185 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9186 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9187 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9188 error_count = GAUDI2_NA_EVENT_CAUSE; 9189 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9190 break; 9191 9192 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9193 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9194 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9195 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9196 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9197 error_count = GAUDI2_NA_EVENT_CAUSE; 9198 break; 9199 9200 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9201 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9202 error_count = GAUDI2_NA_EVENT_CAUSE; 9203 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9204 break; 9205 9206 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9207 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9208 error_count = GAUDI2_NA_EVENT_CAUSE; 9209 /* Do nothing- FW will handle it */ 9210 break; 9211 9212 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9213 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9214 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9215 break; 9216 9217 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9218 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9219 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9220 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9221 break; 9222 9223 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9224 error_count = GAUDI2_NA_EVENT_CAUSE; 9225 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9226 break; 9227 9228 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9229 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9230 le64_to_cpu(eq_entry->data[0])); 9231 error_count = GAUDI2_NA_EVENT_CAUSE; 9232 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9233 break; 9234 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9235 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9236 le64_to_cpu(eq_entry->data[0])); 9237 error_count = GAUDI2_NA_EVENT_CAUSE; 9238 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9239 break; 9240 9241 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9242 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9243 error_count = GAUDI2_NA_EVENT_CAUSE; 9244 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9245 break; 9246 9247 case GAUDI2_EVENT_ARC_DCCM_FULL: 9248 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9249 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9250 break; 9251 9252 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9253 case GAUDI2_EVENT_DEV_RESET_REQ: 9254 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9255 error_count = GAUDI2_NA_EVENT_CAUSE; 9256 is_critical = true; 9257 break; 9258 9259 default: 9260 if (gaudi2_irq_map_table[event_type].valid) { 9261 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9262 event_type); 9263 error_count = GAUDI2_NA_EVENT_CAUSE; 9264 } 9265 } 9266 9267 /* Make sure to dump an error in case no error cause was printed so far. 9268 * Note that although we have counted the errors, we use this number as 9269 * a boolean. 9270 */ 9271 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9272 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9273 else if (error_count == 0) 9274 gaudi2_print_event(hdev, event_type, true, 9275 "No error cause for H/W event %u\n", event_type); 9276 9277 if ((gaudi2_irq_map_table[event_type].reset || reset_required) && 9278 (hdev->hard_reset_on_fw_events || 9279 (hdev->asic_prop.fw_security_enabled && is_critical))) 9280 goto reset_device; 9281 9282 /* Send unmask irq only for interrupts not classified as MSG */ 9283 if (!gaudi2_irq_map_table[event_type].msg) 9284 hl_fw_unmask_irq(hdev, event_type); 9285 9286 if (event_mask) 9287 hl_notifier_event_send_all(hdev, event_mask); 9288 9289 return; 9290 9291 reset_device: 9292 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9293 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9294 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9295 } else { 9296 reset_flags |= HL_DRV_RESET_DELAY; 9297 } 9298 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 9299 hl_device_cond_reset(hdev, reset_flags, event_mask); 9300 } 9301 9302 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 9303 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 9304 u32 hw_queue_id, u32 size, u64 addr, u32 val) 9305 { 9306 u32 ctl, pkt_size; 9307 int rc = 0; 9308 9309 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 9310 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 9311 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 9312 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 9313 9314 lin_dma_pkt->ctl = cpu_to_le32(ctl); 9315 lin_dma_pkt->src_addr = cpu_to_le64(val); 9316 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 9317 lin_dma_pkt->tsize = cpu_to_le32(size); 9318 9319 pkt_size = sizeof(struct packet_lin_dma); 9320 9321 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 9322 if (rc) 9323 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 9324 hw_queue_id); 9325 9326 return rc; 9327 } 9328 9329 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 9330 { 9331 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 9332 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 9333 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 9334 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 9335 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 9336 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 9337 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 9338 struct asic_fixed_properties *prop = &hdev->asic_prop; 9339 void *lin_dma_pkts_arr; 9340 dma_addr_t pkt_dma_addr; 9341 int rc = 0, dma_num = 0; 9342 9343 if (prop->edma_enabled_mask == 0) { 9344 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 9345 return -EIO; 9346 } 9347 9348 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9349 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 9350 comp_addr = CFG_BASE + sob_addr; 9351 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 9352 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 9353 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 9354 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 9355 9356 /* Calculate how many lin dma pkts we'll need */ 9357 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 9358 pkt_size = sizeof(struct packet_lin_dma); 9359 9360 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 9361 &pkt_dma_addr, GFP_KERNEL); 9362 if (!lin_dma_pkts_arr) 9363 return -ENOMEM; 9364 9365 /* 9366 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 9367 * only the first one to restore later 9368 * also set the sob addr for all edma cores for completion. 9369 * set QM as trusted to allow it to access physical address with MMU bp. 9370 */ 9371 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 9372 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9373 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9374 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9375 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9376 9377 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9378 continue; 9379 9380 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 9381 edma_offset, mmubp); 9382 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 9383 lower_32_bits(comp_addr)); 9384 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 9385 upper_32_bits(comp_addr)); 9386 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 9387 comp_val); 9388 gaudi2_qman_set_test_mode(hdev, 9389 edma_queues_id[dcore] + 4 * edma_idx, true); 9390 } 9391 } 9392 9393 WREG32(sob_addr, 0); 9394 9395 while (cur_addr < end_addr) { 9396 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9397 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9398 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9399 9400 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9401 continue; 9402 9403 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 9404 9405 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 9406 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 9407 pkt_dma_addr + dma_num * pkt_size, 9408 edma_queues_id[dcore] + edma_idx * 4, 9409 chunk_size, cur_addr, val); 9410 if (rc) 9411 goto end; 9412 9413 dma_num++; 9414 cur_addr += chunk_size; 9415 if (cur_addr == end_addr) 9416 break; 9417 } 9418 } 9419 } 9420 9421 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 9422 if (rc) { 9423 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 9424 goto end; 9425 } 9426 end: 9427 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9428 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9429 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9430 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9431 9432 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9433 continue; 9434 9435 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 9436 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 9437 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 9438 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 9439 gaudi2_qman_set_test_mode(hdev, 9440 edma_queues_id[dcore] + 4 * edma_idx, false); 9441 } 9442 } 9443 9444 WREG32(sob_addr, 0); 9445 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 9446 9447 return rc; 9448 } 9449 9450 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 9451 { 9452 int rc; 9453 struct asic_fixed_properties *prop = &hdev->asic_prop; 9454 u64 size = prop->dram_end_address - prop->dram_user_base_address; 9455 9456 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 9457 9458 if (rc) 9459 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 9460 prop->dram_user_base_address, size); 9461 return rc; 9462 } 9463 9464 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 9465 { 9466 int rc; 9467 struct asic_fixed_properties *prop = &hdev->asic_prop; 9468 u64 val = hdev->memory_scrub_val; 9469 u64 addr, size; 9470 9471 if (!hdev->memory_scrub) 9472 return 0; 9473 9474 /* scrub SRAM */ 9475 addr = prop->sram_user_base_address; 9476 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 9477 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 9478 addr, addr + size, val); 9479 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 9480 if (rc) { 9481 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 9482 return rc; 9483 } 9484 9485 /* scrub DRAM */ 9486 rc = gaudi2_scrub_device_dram(hdev, val); 9487 if (rc) { 9488 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 9489 return rc; 9490 } 9491 return 0; 9492 } 9493 9494 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 9495 { 9496 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 9497 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 9498 u32 val, size, offset; 9499 int dcore_id; 9500 9501 offset = hdev->asic_prop.first_available_cq[0] * 4; 9502 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 9503 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 9504 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 9505 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 9506 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 9507 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 9508 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 9509 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 9510 9511 /* memset dcore0 CQ registers */ 9512 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9513 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9514 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9515 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9516 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9517 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9518 9519 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 9520 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 9521 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 9522 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 9523 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 9524 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 9525 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 9526 9527 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9528 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9529 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9530 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9531 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9532 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9533 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9534 9535 cq_lbw_l_addr += DCORE_OFFSET; 9536 cq_lbw_h_addr += DCORE_OFFSET; 9537 cq_lbw_data_addr += DCORE_OFFSET; 9538 cq_base_l_addr += DCORE_OFFSET; 9539 cq_base_h_addr += DCORE_OFFSET; 9540 cq_size_addr += DCORE_OFFSET; 9541 } 9542 9543 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 9544 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 9545 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 9546 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 9547 9548 /* memset dcore0 monitors */ 9549 gaudi2_memset_device_lbw(hdev, addr, size, val); 9550 9551 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 9552 gaudi2_memset_device_lbw(hdev, addr, size, 0); 9553 9554 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 9555 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 9556 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 9557 9558 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9559 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 9560 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 9561 mon_sts_addr += DCORE_OFFSET; 9562 mon_cfg_addr += DCORE_OFFSET; 9563 } 9564 9565 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9566 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 9567 val = 0; 9568 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 9569 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9570 9571 /* memset dcore0 sobs */ 9572 gaudi2_memset_device_lbw(hdev, addr, size, val); 9573 9574 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 9575 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 9576 9577 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9578 gaudi2_memset_device_lbw(hdev, addr, size, val); 9579 addr += DCORE_OFFSET; 9580 } 9581 9582 /* Flush all WREG to prevent race */ 9583 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9584 } 9585 9586 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 9587 { 9588 u32 reg_base, hw_queue_id; 9589 9590 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 9591 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9592 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9593 continue; 9594 9595 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9596 9597 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9598 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9599 } 9600 9601 /* Flush all WREG to prevent race */ 9602 RREG32(mmPDMA0_QM_ARB_CFG_0); 9603 } 9604 9605 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 9606 { 9607 u32 reg_base, hw_queue_id; 9608 9609 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 9610 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9611 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9612 continue; 9613 9614 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9615 9616 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9617 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9618 } 9619 9620 /* Flush all WREG to prevent race */ 9621 RREG32(mmPDMA0_QM_ARB_CFG_0); 9622 } 9623 9624 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 9625 { 9626 return 0; 9627 } 9628 9629 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 9630 { 9631 } 9632 9633 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 9634 struct dup_block_ctx *cfg_ctx) 9635 { 9636 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 9637 u8 seq; 9638 int i; 9639 9640 for (i = 0 ; i < cfg_ctx->instances ; i++) { 9641 seq = block_idx * cfg_ctx->instances + i; 9642 9643 /* skip disabled instance */ 9644 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 9645 continue; 9646 9647 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 9648 cfg_ctx->data); 9649 } 9650 } 9651 9652 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 9653 u64 mask) 9654 { 9655 int i; 9656 9657 cfg_ctx->enabled_mask = mask; 9658 9659 for (i = 0 ; i < cfg_ctx->blocks ; i++) 9660 gaudi2_init_block_instances(hdev, i, cfg_ctx); 9661 } 9662 9663 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 9664 { 9665 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 9666 } 9667 9668 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 9669 { 9670 void *host_mem_virtual_addr; 9671 dma_addr_t host_mem_dma_addr; 9672 u64 reserved_va_base; 9673 u32 pos, size_left, size_to_dma; 9674 struct hl_ctx *ctx; 9675 int rc = 0; 9676 9677 /* Fetch the ctx */ 9678 ctx = hl_get_compute_ctx(hdev); 9679 if (!ctx) { 9680 dev_err(hdev->dev, "No ctx available\n"); 9681 return -EINVAL; 9682 } 9683 9684 /* Allocate buffers for read and for poll */ 9685 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 9686 GFP_KERNEL | __GFP_ZERO); 9687 if (host_mem_virtual_addr == NULL) { 9688 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 9689 rc = -ENOMEM; 9690 goto put_ctx; 9691 } 9692 9693 /* Reserve VM region on asic side */ 9694 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 9695 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9696 if (!reserved_va_base) { 9697 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 9698 rc = -ENOMEM; 9699 goto free_data_buffer; 9700 } 9701 9702 /* Create mapping on asic side */ 9703 mutex_lock(&hdev->mmu_lock); 9704 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 9705 hl_mmu_invalidate_cache_range(hdev, false, 9706 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 9707 ctx->asid, reserved_va_base, SZ_2M); 9708 mutex_unlock(&hdev->mmu_lock); 9709 if (rc) { 9710 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 9711 goto unreserve_va; 9712 } 9713 9714 /* Enable MMU on KDMA */ 9715 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 9716 9717 pos = 0; 9718 size_left = size; 9719 size_to_dma = SZ_2M; 9720 9721 while (size_left > 0) { 9722 if (size_left < SZ_2M) 9723 size_to_dma = size_left; 9724 9725 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 9726 if (rc) 9727 break; 9728 9729 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 9730 9731 if (size_left <= SZ_2M) 9732 break; 9733 9734 pos += SZ_2M; 9735 addr += SZ_2M; 9736 size_left -= SZ_2M; 9737 } 9738 9739 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 9740 9741 mutex_lock(&hdev->mmu_lock); 9742 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 9743 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 9744 ctx->asid, reserved_va_base, SZ_2M); 9745 mutex_unlock(&hdev->mmu_lock); 9746 unreserve_va: 9747 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 9748 free_data_buffer: 9749 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 9750 put_ctx: 9751 hl_ctx_put(ctx); 9752 9753 return rc; 9754 } 9755 9756 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 9757 { 9758 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9759 int min_alloc_order, rc; 9760 9761 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9762 return 0; 9763 9764 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 9765 HOST_SPACE_INTERNAL_CB_SZ, 9766 &hdev->internal_cb_pool_dma_addr, 9767 GFP_KERNEL | __GFP_ZERO); 9768 9769 if (!hdev->internal_cb_pool_virt_addr) 9770 return -ENOMEM; 9771 9772 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 9773 gaudi2_get_wait_cb_size(hdev))); 9774 9775 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 9776 if (!hdev->internal_cb_pool) { 9777 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 9778 rc = -ENOMEM; 9779 goto free_internal_cb_pool; 9780 } 9781 9782 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 9783 HOST_SPACE_INTERNAL_CB_SZ, -1); 9784 if (rc) { 9785 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 9786 rc = -EFAULT; 9787 goto destroy_internal_cb_pool; 9788 } 9789 9790 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 9791 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9792 9793 if (!hdev->internal_cb_va_base) { 9794 rc = -ENOMEM; 9795 goto destroy_internal_cb_pool; 9796 } 9797 9798 mutex_lock(&hdev->mmu_lock); 9799 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 9800 HOST_SPACE_INTERNAL_CB_SZ); 9801 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 9802 mutex_unlock(&hdev->mmu_lock); 9803 9804 if (rc) 9805 goto unreserve_internal_cb_pool; 9806 9807 return 0; 9808 9809 unreserve_internal_cb_pool: 9810 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9811 destroy_internal_cb_pool: 9812 gen_pool_destroy(hdev->internal_cb_pool); 9813 free_internal_cb_pool: 9814 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9815 hdev->internal_cb_pool_dma_addr); 9816 9817 return rc; 9818 } 9819 9820 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 9821 { 9822 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9823 9824 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9825 return; 9826 9827 mutex_lock(&hdev->mmu_lock); 9828 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9829 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9830 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 9831 mutex_unlock(&hdev->mmu_lock); 9832 9833 gen_pool_destroy(hdev->internal_cb_pool); 9834 9835 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9836 hdev->internal_cb_pool_dma_addr); 9837 } 9838 9839 static void gaudi2_restore_user_registers(struct hl_device *hdev) 9840 { 9841 gaudi2_restore_user_sm_registers(hdev); 9842 gaudi2_restore_user_qm_registers(hdev); 9843 } 9844 9845 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9846 { 9847 struct hl_device *hdev = ctx->hdev; 9848 struct asic_fixed_properties *prop = &hdev->asic_prop; 9849 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9850 int rc; 9851 9852 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9853 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 9854 if (rc) 9855 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 9856 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9857 9858 return rc; 9859 } 9860 9861 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9862 { 9863 struct hl_device *hdev = ctx->hdev; 9864 struct asic_fixed_properties *prop = &hdev->asic_prop; 9865 int rc; 9866 9867 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9868 prop->pmmu.page_size, true); 9869 if (rc) 9870 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 9871 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9872 } 9873 9874 static int gaudi2_ctx_init(struct hl_ctx *ctx) 9875 { 9876 int rc; 9877 9878 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 9879 if (rc) 9880 return rc; 9881 9882 /* No need to clear user registers if the device has just 9883 * performed reset, we restore only nic qm registers 9884 */ 9885 if (ctx->hdev->reset_upon_device_release) 9886 gaudi2_restore_nic_qm_registers(ctx->hdev); 9887 else 9888 gaudi2_restore_user_registers(ctx->hdev); 9889 9890 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 9891 if (rc) 9892 return rc; 9893 9894 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 9895 if (rc) 9896 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9897 9898 return rc; 9899 } 9900 9901 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 9902 { 9903 if (ctx->asid == HL_KERNEL_ASID_ID) 9904 return; 9905 9906 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9907 9908 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 9909 } 9910 9911 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 9912 { 9913 struct hl_device *hdev = cs->ctx->hdev; 9914 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 9915 u32 mon_payload, sob_id, mon_id; 9916 9917 if (!cs_needs_completion(cs)) 9918 return 0; 9919 9920 /* 9921 * First 64 SOB/MON are reserved for driver for QMAN auto completion 9922 * mechanism. Each SOB/MON pair are used for a pending CS with the same 9923 * cyclic index. The SOB value is increased when each of the CS jobs is 9924 * completed. When the SOB reaches the number of CS jobs, the monitor 9925 * generates MSI-X interrupt. 9926 */ 9927 9928 sob_id = mon_id = index; 9929 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 9930 (1 << CQ_ENTRY_READY_SHIFT) | index; 9931 9932 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 9933 cs->jobs_cnt); 9934 9935 return 0; 9936 } 9937 9938 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 9939 { 9940 return HL_INVALID_QUEUE; 9941 } 9942 9943 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 9944 { 9945 struct hl_cb *cb = data; 9946 struct packet_msg_short *pkt; 9947 u32 value, ctl, pkt_size = sizeof(*pkt); 9948 9949 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 9950 memset(pkt, 0, pkt_size); 9951 9952 /* Inc by 1, Mode ADD */ 9953 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 9954 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 9955 9956 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 9957 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 9958 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9959 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 9960 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 9961 9962 pkt->value = cpu_to_le32(value); 9963 pkt->ctl = cpu_to_le32(ctl); 9964 9965 return size + pkt_size; 9966 } 9967 9968 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 9969 { 9970 u32 ctl, pkt_size = sizeof(*pkt); 9971 9972 memset(pkt, 0, pkt_size); 9973 9974 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 9975 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 9976 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9977 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 9978 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 9979 9980 pkt->value = cpu_to_le32(value); 9981 pkt->ctl = cpu_to_le32(ctl); 9982 9983 return pkt_size; 9984 } 9985 9986 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 9987 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 9988 { 9989 u32 ctl, value, pkt_size = sizeof(*pkt); 9990 u8 mask; 9991 9992 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 9993 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 9994 return 0; 9995 } 9996 9997 memset(pkt, 0, pkt_size); 9998 9999 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 10000 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 10001 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 10002 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 10003 10004 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10005 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10006 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10007 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10008 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10009 10010 pkt->value = cpu_to_le32(value); 10011 pkt->ctl = cpu_to_le32(ctl); 10012 10013 return pkt_size; 10014 } 10015 10016 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 10017 { 10018 u32 ctl, cfg, pkt_size = sizeof(*pkt); 10019 10020 memset(pkt, 0, pkt_size); 10021 10022 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 10023 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 10024 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 10025 10026 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 10027 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10028 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10029 10030 pkt->cfg = cpu_to_le32(cfg); 10031 pkt->ctl = cpu_to_le32(ctl); 10032 10033 return pkt_size; 10034 } 10035 10036 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 10037 { 10038 struct hl_cb *cb = prop->data; 10039 void *buf = (void *) (uintptr_t) (cb->kernel_address); 10040 10041 u64 monitor_base, fence_addr = 0; 10042 u32 stream_index, size = prop->size; 10043 u16 msg_addr_offset; 10044 10045 stream_index = prop->q_idx % 4; 10046 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 10047 QM_FENCE2_OFFSET + stream_index * 4; 10048 10049 /* 10050 * monitor_base should be the content of the base0 address registers, 10051 * so it will be added to the msg short offsets 10052 */ 10053 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 10054 10055 /* First monitor config packet: low address of the sync */ 10056 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 10057 monitor_base; 10058 10059 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 10060 10061 /* Second monitor config packet: high address of the sync */ 10062 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 10063 monitor_base; 10064 10065 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 10066 10067 /* 10068 * Third monitor config packet: the payload, i.e. what to write when the 10069 * sync triggers 10070 */ 10071 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10072 monitor_base; 10073 10074 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10075 10076 /* Fourth monitor config packet: bind the monitor to a sync object */ 10077 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10078 10079 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10080 prop->sob_val, msg_addr_offset); 10081 10082 /* Fence packet */ 10083 size += gaudi2_add_fence_pkt(buf + size); 10084 10085 return size; 10086 } 10087 10088 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10089 { 10090 struct hl_hw_sob *hw_sob = data; 10091 10092 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10093 10094 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10095 10096 kref_init(&hw_sob->kref); 10097 } 10098 10099 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10100 { 10101 } 10102 10103 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10104 { 10105 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10106 10107 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10108 } 10109 10110 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10111 { 10112 return 0; 10113 } 10114 10115 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10116 struct hl_cs *cs, u32 wait_queue_id, 10117 u32 collective_engine_id, u32 encaps_signal_offset) 10118 { 10119 return -EINVAL; 10120 } 10121 10122 /* 10123 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10124 * to DMMU page-size address (64MB) before mapping it in 10125 * the MMU. 10126 * The operation is performed on both the virtual and physical addresses. 10127 * for device with 6 HBMs the scramble is: 10128 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10129 * 10130 * Example: 10131 * ============================================================================= 10132 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10133 * Phys address in MMU last 10134 * HOP 10135 * ============================================================================= 10136 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10137 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10138 * ============================================================================= 10139 */ 10140 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10141 { 10142 struct asic_fixed_properties *prop = &hdev->asic_prop; 10143 u32 divisor, mod_va; 10144 u64 div_va; 10145 10146 /* accept any address in the DRAM address space */ 10147 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10148 VA_HBM_SPACE_END)) { 10149 10150 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10151 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10152 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10153 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10154 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10155 } 10156 10157 return raw_addr; 10158 } 10159 10160 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10161 { 10162 struct asic_fixed_properties *prop = &hdev->asic_prop; 10163 u32 divisor, mod_va; 10164 u64 div_va; 10165 10166 /* accept any address in the DRAM address space */ 10167 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10168 VA_HBM_SPACE_END)) { 10169 10170 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10171 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10172 PAGE_SIZE_64MB, &mod_va); 10173 10174 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10175 (div_va * divisor + mod_va)); 10176 } 10177 10178 return scrambled_addr; 10179 } 10180 10181 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10182 { 10183 u32 base = 0, dcore_id, dec_id; 10184 10185 if (core_id >= NUMBER_OF_DEC) { 10186 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10187 goto out; 10188 } 10189 10190 if (core_id < 8) { 10191 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10192 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10193 10194 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10195 dec_id * DCORE_VDEC_OFFSET; 10196 } else { 10197 /* PCIe Shared Decoder */ 10198 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10199 } 10200 out: 10201 return base; 10202 } 10203 10204 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10205 u32 *block_size, u32 *block_id) 10206 { 10207 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10208 int i; 10209 10210 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10211 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10212 *block_id = i; 10213 if (block_size) 10214 *block_size = gaudi2->mapped_blocks[i].size; 10215 return 0; 10216 } 10217 } 10218 10219 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10220 10221 return -EINVAL; 10222 } 10223 10224 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10225 u32 block_id, u32 block_size) 10226 { 10227 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10228 u64 offset_in_bar; 10229 u64 address; 10230 int rc; 10231 10232 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10233 dev_err(hdev->dev, "Invalid block id %u", block_id); 10234 return -EINVAL; 10235 } 10236 10237 /* we allow mapping only an entire block */ 10238 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10239 dev_err(hdev->dev, "Invalid block size %u", block_size); 10240 return -EINVAL; 10241 } 10242 10243 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10244 10245 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10246 10247 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10248 VM_DONTCOPY | VM_NORESERVE; 10249 10250 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10251 block_size, vma->vm_page_prot); 10252 if (rc) 10253 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10254 10255 return rc; 10256 } 10257 10258 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10259 { 10260 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10261 10262 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10263 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10264 10265 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10266 WREG32(irq_handler_offset, 10267 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10268 } 10269 10270 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10271 { 10272 switch (mmu_id) { 10273 case HW_CAP_DCORE0_DMMU0: 10274 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 10275 break; 10276 case HW_CAP_DCORE0_DMMU1: 10277 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 10278 break; 10279 case HW_CAP_DCORE0_DMMU2: 10280 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 10281 break; 10282 case HW_CAP_DCORE0_DMMU3: 10283 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 10284 break; 10285 case HW_CAP_DCORE1_DMMU0: 10286 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 10287 break; 10288 case HW_CAP_DCORE1_DMMU1: 10289 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 10290 break; 10291 case HW_CAP_DCORE1_DMMU2: 10292 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 10293 break; 10294 case HW_CAP_DCORE1_DMMU3: 10295 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 10296 break; 10297 case HW_CAP_DCORE2_DMMU0: 10298 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 10299 break; 10300 case HW_CAP_DCORE2_DMMU1: 10301 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 10302 break; 10303 case HW_CAP_DCORE2_DMMU2: 10304 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 10305 break; 10306 case HW_CAP_DCORE2_DMMU3: 10307 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 10308 break; 10309 case HW_CAP_DCORE3_DMMU0: 10310 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 10311 break; 10312 case HW_CAP_DCORE3_DMMU1: 10313 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 10314 break; 10315 case HW_CAP_DCORE3_DMMU2: 10316 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 10317 break; 10318 case HW_CAP_DCORE3_DMMU3: 10319 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 10320 break; 10321 case HW_CAP_PMMU: 10322 *mmu_base = mmPMMU_HBW_MMU_BASE; 10323 break; 10324 default: 10325 return -EINVAL; 10326 } 10327 10328 return 0; 10329 } 10330 10331 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 10332 { 10333 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 10334 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10335 u32 mmu_base; 10336 10337 if (!(gaudi2->hw_cap_initialized & mmu_id)) 10338 return; 10339 10340 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 10341 return; 10342 10343 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 10344 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 10345 } 10346 10347 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 10348 { 10349 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 10350 10351 /* check all HMMUs */ 10352 for (i = 0 ; i < num_of_hmmus ; i++) { 10353 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 10354 10355 if (mmu_cap_mask & mmu_id) 10356 gaudi2_ack_mmu_error(hdev, mmu_id); 10357 } 10358 10359 /* check PMMU */ 10360 if (mmu_cap_mask & HW_CAP_PMMU) 10361 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 10362 10363 return 0; 10364 } 10365 10366 static void gaudi2_get_msi_info(__le32 *table) 10367 { 10368 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 10369 } 10370 10371 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 10372 { 10373 switch (pll_idx) { 10374 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 10375 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 10376 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 10377 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 10378 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 10379 case HL_GAUDI2_MME_PLL: return MME_PLL; 10380 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 10381 case HL_GAUDI2_IF_PLL: return IF_PLL; 10382 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 10383 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 10384 case HL_GAUDI2_VID_PLL: return VID_PLL; 10385 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 10386 default: return -EINVAL; 10387 } 10388 } 10389 10390 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 10391 { 10392 /* Not implemented */ 10393 return 0; 10394 } 10395 10396 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 10397 { 10398 /* Not implemented */ 10399 return 0; 10400 } 10401 10402 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 10403 struct hl_device *hdev, struct hl_mon_state_dump *mon) 10404 { 10405 /* Not implemented */ 10406 return 0; 10407 } 10408 10409 10410 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 10411 u64 status_base_offset, enum hl_sync_engine_type engine_type, 10412 u32 engine_id, char **buf, size_t *size, size_t *offset) 10413 { 10414 /* Not implemented */ 10415 return 0; 10416 } 10417 10418 10419 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 10420 .monitor_valid = gaudi2_monitor_valid, 10421 .print_single_monitor = gaudi2_print_single_monitor, 10422 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 10423 .print_fences_single_engine = gaudi2_print_fences_single_engine, 10424 }; 10425 10426 static void gaudi2_state_dump_init(struct hl_device *hdev) 10427 { 10428 /* Not implemented */ 10429 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 10430 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 10431 } 10432 10433 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 10434 { 10435 return 0; 10436 } 10437 10438 static u32 *gaudi2_get_stream_master_qid_arr(void) 10439 { 10440 return NULL; 10441 } 10442 10443 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 10444 struct attribute_group *dev_vrm_attr_grp) 10445 { 10446 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 10447 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 10448 } 10449 10450 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 10451 u32 page_size, u32 *real_page_size, bool is_dram_addr) 10452 { 10453 struct asic_fixed_properties *prop = &hdev->asic_prop; 10454 10455 /* for host pages the page size must be */ 10456 if (!is_dram_addr) { 10457 if (page_size % mmu_prop->page_size) 10458 goto page_size_err; 10459 10460 *real_page_size = mmu_prop->page_size; 10461 return 0; 10462 } 10463 10464 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 10465 goto page_size_err; 10466 10467 /* 10468 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 10469 * than DRAM page size). 10470 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 10471 * this mismatch when calculating the address to place in the MMU page table. 10472 * (in that case also make sure that the dram_page_size is not greater than the 10473 * mmu page size) 10474 */ 10475 *real_page_size = prop->dram_page_size; 10476 10477 return 0; 10478 10479 page_size_err: 10480 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 10481 page_size, mmu_prop->page_size >> 10); 10482 return -EFAULT; 10483 } 10484 10485 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 10486 { 10487 return -EOPNOTSUPP; 10488 } 10489 10490 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 10491 { 10492 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10493 10494 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 10495 return 0; 10496 10497 return hl_fw_send_device_activity(hdev, open); 10498 } 10499 10500 static const struct hl_asic_funcs gaudi2_funcs = { 10501 .early_init = gaudi2_early_init, 10502 .early_fini = gaudi2_early_fini, 10503 .late_init = gaudi2_late_init, 10504 .late_fini = gaudi2_late_fini, 10505 .sw_init = gaudi2_sw_init, 10506 .sw_fini = gaudi2_sw_fini, 10507 .hw_init = gaudi2_hw_init, 10508 .hw_fini = gaudi2_hw_fini, 10509 .halt_engines = gaudi2_halt_engines, 10510 .suspend = gaudi2_suspend, 10511 .resume = gaudi2_resume, 10512 .mmap = gaudi2_mmap, 10513 .ring_doorbell = gaudi2_ring_doorbell, 10514 .pqe_write = gaudi2_pqe_write, 10515 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 10516 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 10517 .scrub_device_mem = gaudi2_scrub_device_mem, 10518 .scrub_device_dram = gaudi2_scrub_device_dram, 10519 .get_int_queue_base = NULL, 10520 .test_queues = gaudi2_test_queues, 10521 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 10522 .asic_dma_pool_free = gaudi2_dma_pool_free, 10523 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 10524 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 10525 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 10526 .asic_dma_map_single = gaudi2_dma_map_single, 10527 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 10528 .cs_parser = gaudi2_cs_parser, 10529 .asic_dma_map_sgtable = hl_dma_map_sgtable, 10530 .add_end_of_cb_packets = NULL, 10531 .update_eq_ci = gaudi2_update_eq_ci, 10532 .context_switch = gaudi2_context_switch, 10533 .restore_phase_topology = gaudi2_restore_phase_topology, 10534 .debugfs_read_dma = gaudi2_debugfs_read_dma, 10535 .add_device_attr = gaudi2_add_device_attr, 10536 .handle_eqe = gaudi2_handle_eqe, 10537 .get_events_stat = gaudi2_get_events_stat, 10538 .read_pte = NULL, 10539 .write_pte = NULL, 10540 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 10541 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 10542 .mmu_prefetch_cache_range = NULL, 10543 .send_heartbeat = gaudi2_send_heartbeat, 10544 .debug_coresight = gaudi2_debug_coresight, 10545 .is_device_idle = gaudi2_is_device_idle, 10546 .compute_reset_late_init = gaudi2_compute_reset_late_init, 10547 .hw_queues_lock = gaudi2_hw_queues_lock, 10548 .hw_queues_unlock = gaudi2_hw_queues_unlock, 10549 .get_pci_id = gaudi2_get_pci_id, 10550 .get_eeprom_data = gaudi2_get_eeprom_data, 10551 .get_monitor_dump = gaudi2_get_monitor_dump, 10552 .send_cpu_message = gaudi2_send_cpu_message, 10553 .pci_bars_map = gaudi2_pci_bars_map, 10554 .init_iatu = gaudi2_init_iatu, 10555 .rreg = hl_rreg, 10556 .wreg = hl_wreg, 10557 .halt_coresight = gaudi2_halt_coresight, 10558 .ctx_init = gaudi2_ctx_init, 10559 .ctx_fini = gaudi2_ctx_fini, 10560 .pre_schedule_cs = gaudi2_pre_schedule_cs, 10561 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 10562 .load_firmware_to_device = NULL, 10563 .load_boot_fit_to_device = NULL, 10564 .get_signal_cb_size = gaudi2_get_signal_cb_size, 10565 .get_wait_cb_size = gaudi2_get_wait_cb_size, 10566 .gen_signal_cb = gaudi2_gen_signal_cb, 10567 .gen_wait_cb = gaudi2_gen_wait_cb, 10568 .reset_sob = gaudi2_reset_sob, 10569 .reset_sob_group = gaudi2_reset_sob_group, 10570 .get_device_time = gaudi2_get_device_time, 10571 .pb_print_security_errors = gaudi2_pb_print_security_errors, 10572 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 10573 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 10574 .get_dec_base_addr = gaudi2_get_dec_base_addr, 10575 .scramble_addr = gaudi2_mmu_scramble_addr, 10576 .descramble_addr = gaudi2_mmu_descramble_addr, 10577 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 10578 .get_hw_block_id = gaudi2_get_hw_block_id, 10579 .hw_block_mmap = gaudi2_block_mmap, 10580 .enable_events_from_fw = gaudi2_enable_events_from_fw, 10581 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 10582 .get_msi_info = gaudi2_get_msi_info, 10583 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 10584 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 10585 .init_firmware_loader = gaudi2_init_firmware_loader, 10586 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 10587 .state_dump_init = gaudi2_state_dump_init, 10588 .get_sob_addr = &gaudi2_get_sob_addr, 10589 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 10590 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 10591 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 10592 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 10593 .access_dev_mem = hl_access_dev_mem, 10594 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 10595 .set_engine_cores = gaudi2_set_engine_cores, 10596 .send_device_activity = gaudi2_send_device_activity, 10597 .set_dram_properties = gaudi2_set_dram_properties, 10598 }; 10599 10600 void gaudi2_set_asic_funcs(struct hl_device *hdev) 10601 { 10602 hdev->asic_funcs = &gaudi2_funcs; 10603 } 10604