1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/hw_ip/mmu/mmu_general.h" 11 #include "../include/hw_ip/mmu/mmu_v2_0.h" 12 #include "../include/gaudi2/gaudi2_packets.h" 13 #include "../include/gaudi2/gaudi2_reg_map.h" 14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 16 17 #include <linux/module.h> 18 #include <linux/pci.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 23 24 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ 26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 29 #define GAUDI2_RESET_POLL_CNT 3 30 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 31 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 32 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 33 #define GAUDI2_CB_POOL_CB_CNT 512 34 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 37 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 39 40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 41 42 /* 43 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 44 * and the code relies on that value (for array size etc..) we define another value 45 * for MAX faulty TPCs which reflects the cluster binning requirements 46 */ 47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 48 #define MAX_FAULTY_XBARS 1 49 #define MAX_FAULTY_EDMAS 1 50 #define MAX_FAULTY_DECODERS 1 51 52 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 53 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 54 #define GAUDI2_DECODER_FULL_MASK 0x3FF 55 56 #define GAUDI2_NA_EVENT_CAUSE 0xFF 57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 78 79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 81 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 82 83 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 85 86 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 87 88 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \ 89 (!((dma_core_idle_ind_mask) & \ 90 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \ 91 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK)))) 92 93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 94 95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 96 97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 98 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 99 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 100 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 101 102 #define PCIE_DEC_EN_MASK 0x300 103 #define DEC_WORK_STATE_IDLE 0 104 #define DEC_WORK_STATE_PEND 3 105 #define IS_DEC_IDLE(dec_swreg15) \ 106 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 107 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 108 109 /* HBM MMU address scrambling parameters */ 110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 115 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 116 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 117 #define MMU_RANGE_INV_EN_SHIFT 0 118 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 119 #define MMU_RANGE_INV_ASID_SHIFT 2 120 121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 122 * a 2 entries FIFO, and hence it is not enabled for it. 123 */ 124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 126 127 #define GAUDI2_MAX_STRING_LEN 64 128 129 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 130 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 131 132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 133 134 enum hl_pmmu_fatal_cause { 135 LATENCY_RD_OUT_FIFO_OVERRUN, 136 LATENCY_WR_OUT_FIFO_OVERRUN, 137 }; 138 139 enum hl_pcie_drain_ind_cause { 140 LBW_AXI_DRAIN_IND, 141 HBW_AXI_DRAIN_IND 142 }; 143 144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 145 [HBM_ID0] = 0xFFFC, 146 [HBM_ID1] = 0xFFCF, 147 [HBM_ID2] = 0xF7F7, 148 [HBM_ID3] = 0x7F7F, 149 [HBM_ID4] = 0xFCFF, 150 [HBM_ID5] = 0xCFFF, 151 }; 152 153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 154 [0] = HBM_ID0, 155 [1] = HBM_ID1, 156 [2] = HBM_ID4, 157 [3] = HBM_ID5, 158 }; 159 160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 161 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 162 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 163 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 164 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 165 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 166 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 167 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 168 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 169 }; 170 171 static const int gaudi2_qman_async_event_id[] = { 172 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 173 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 174 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 175 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 176 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 177 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 178 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 179 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 180 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 185 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 186 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 187 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 188 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 189 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 190 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 191 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 192 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 193 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 194 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 195 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 196 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 197 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 198 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 199 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 200 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 201 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 202 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 203 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 204 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 205 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 206 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 207 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 208 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 209 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 210 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 211 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 212 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 213 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 214 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 215 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 216 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 217 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 218 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 219 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 220 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 225 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 226 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 227 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 228 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 229 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 230 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 231 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 232 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 233 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 234 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 235 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 236 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 237 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 238 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 239 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 240 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 241 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 242 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 243 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 244 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 245 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 246 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 247 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 248 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 249 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 250 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 251 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 252 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 253 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 254 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 255 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 256 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 261 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 262 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 263 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 264 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 265 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 266 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 267 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 268 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 269 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 270 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 271 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 272 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 273 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 274 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 275 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 276 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 277 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 278 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 279 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 280 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 281 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 282 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 283 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 284 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 285 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 286 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 287 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 288 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 289 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 290 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 291 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 292 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 297 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 298 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 299 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 300 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 301 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 302 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 303 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 304 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 305 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 306 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 307 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 308 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 309 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 310 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 311 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 312 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 313 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 314 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 315 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 316 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 317 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 318 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 319 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 320 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 321 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 322 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 323 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 324 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 325 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 326 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 327 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 328 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 329 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 330 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 331 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 332 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 333 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 334 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 335 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 336 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 337 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 338 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 339 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 340 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 341 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 342 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 343 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 344 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 345 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 346 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 347 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 348 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 349 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 350 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 351 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 352 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 353 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 354 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 355 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 356 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 357 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 358 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 359 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 360 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 361 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 362 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 363 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 364 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 365 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 366 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 367 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 368 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 369 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 370 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 371 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 372 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 373 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 374 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 375 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 376 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 377 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 378 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 379 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 380 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 381 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 382 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 383 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 384 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 385 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 386 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 387 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 388 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 389 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 390 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 391 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 392 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 393 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 394 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 395 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 396 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 397 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 398 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 399 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 400 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 401 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 402 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 403 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 404 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 405 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 406 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 407 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 408 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 409 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 410 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 411 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 412 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 413 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 414 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 415 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 416 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 417 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 418 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 419 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 420 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 421 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 422 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 423 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 424 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 425 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 426 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 427 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 428 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 429 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 430 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 431 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 432 }; 433 434 static const int gaudi2_dma_core_async_event_id[] = { 435 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 436 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 437 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 438 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 439 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 440 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 441 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 442 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 443 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 444 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 445 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 446 }; 447 448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 449 "qman sei intr", 450 "arc sei intr" 451 }; 452 453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 454 "AXI_TERMINATOR WR", 455 "AXI_TERMINATOR RD", 456 "AXI SPLIT SEI Status" 457 }; 458 459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 460 "cbu_bresp_sei_intr_cause", 461 "cbu_rresp_sei_intr_cause", 462 "lbu_bresp_sei_intr_cause", 463 "lbu_rresp_sei_intr_cause", 464 "cbu_axi_split_intr_cause", 465 "lbu_axi_split_intr_cause", 466 "arc_ip_excptn_sei_intr_cause", 467 "dmi_bresp_sei_intr_cause", 468 "aux2apb_err_sei_intr_cause", 469 "cfg_lbw_wr_terminated_intr_cause", 470 "cfg_lbw_rd_terminated_intr_cause", 471 "cfg_dccm_wr_terminated_intr_cause", 472 "cfg_dccm_rd_terminated_intr_cause", 473 "cfg_hbw_rd_terminated_intr_cause" 474 }; 475 476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 477 "msix_vcd_hbw_sei", 478 "msix_l2c_hbw_sei", 479 "msix_nrm_hbw_sei", 480 "msix_abnrm_hbw_sei", 481 "msix_vcd_lbw_sei", 482 "msix_l2c_lbw_sei", 483 "msix_nrm_lbw_sei", 484 "msix_abnrm_lbw_sei", 485 "apb_vcd_lbw_sei", 486 "apb_l2c_lbw_sei", 487 "apb_nrm_lbw_sei", 488 "apb_abnrm_lbw_sei", 489 "dec_sei", 490 "dec_apb_sei", 491 "trc_apb_sei", 492 "lbw_mstr_if_sei", 493 "axi_split_bresp_err_sei", 494 "hbw_axi_wr_viol_sei", 495 "hbw_axi_rd_viol_sei", 496 "lbw_axi_wr_viol_sei", 497 "lbw_axi_rd_viol_sei", 498 "vcd_spi", 499 "l2c_spi", 500 "nrm_spi", 501 "abnrm_spi", 502 }; 503 504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 505 "PQ AXI HBW error", 506 "CQ AXI HBW error", 507 "CP AXI HBW error", 508 "CP error due to undefined OPCODE", 509 "CP encountered STOP OPCODE", 510 "CP AXI LBW error", 511 "CP WRREG32 or WRBULK returned error", 512 "N/A", 513 "FENCE 0 inc over max value and clipped", 514 "FENCE 1 inc over max value and clipped", 515 "FENCE 2 inc over max value and clipped", 516 "FENCE 3 inc over max value and clipped", 517 "FENCE 0 dec under min value and clipped", 518 "FENCE 1 dec under min value and clipped", 519 "FENCE 2 dec under min value and clipped", 520 "FENCE 3 dec under min value and clipped", 521 "CPDMA Up overflow", 522 "PQC L2H error" 523 }; 524 525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 526 "RSVD0", 527 "CQ AXI HBW error", 528 "CP AXI HBW error", 529 "CP error due to undefined OPCODE", 530 "CP encountered STOP OPCODE", 531 "CP AXI LBW error", 532 "CP WRREG32 or WRBULK returned error", 533 "N/A", 534 "FENCE 0 inc over max value and clipped", 535 "FENCE 1 inc over max value and clipped", 536 "FENCE 2 inc over max value and clipped", 537 "FENCE 3 inc over max value and clipped", 538 "FENCE 0 dec under min value and clipped", 539 "FENCE 1 dec under min value and clipped", 540 "FENCE 2 dec under min value and clipped", 541 "FENCE 3 dec under min value and clipped", 542 "CPDMA Up overflow", 543 "RSVD17", 544 "CQ_WR_IFIFO_CI_ERR", 545 "CQ_WR_CTL_CI_ERR", 546 "ARC_CQF_RD_ERR", 547 "ARC_CQ_WR_IFIFO_CI_ERR", 548 "ARC_CQ_WR_CTL_CI_ERR", 549 "ARC_AXI_ERR", 550 "CP_SWITCH_WDT_ERR" 551 }; 552 553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 554 "Choice push while full error", 555 "Choice Q watchdog error", 556 "MSG AXI LBW returned with error" 557 }; 558 559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 560 "qm_axi_err", 561 "qm_trace_fence_events", 562 "qm_sw_err", 563 "qm_cp_sw_stop", 564 "lbw_mstr_rresp_err", 565 "lbw_mstr_bresp_err", 566 "lbw_msg_slverr", 567 "hbw_msg_slverr", 568 "wbc_slverr", 569 "hbw_mstr_rresp_err", 570 "hbw_mstr_bresp_err", 571 "sb_resp_intr", 572 "mrsb_resp_intr", 573 "core_dw_status_0", 574 "core_dw_status_1", 575 "core_dw_status_2", 576 "core_dw_status_3", 577 "core_dw_status_4", 578 "core_dw_status_5", 579 "core_dw_status_6", 580 "core_dw_status_7", 581 "async_arc2cpu_sei_intr", 582 }; 583 584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 585 "tpc_address_exceed_slm", 586 "tpc_div_by_0", 587 "tpc_spu_mac_overflow", 588 "tpc_spu_addsub_overflow", 589 "tpc_spu_abs_overflow", 590 "tpc_spu_fma_fp_dst_nan", 591 "tpc_spu_fma_fp_dst_inf", 592 "tpc_spu_convert_fp_dst_nan", 593 "tpc_spu_convert_fp_dst_inf", 594 "tpc_spu_fp_dst_denorm", 595 "tpc_vpu_mac_overflow", 596 "tpc_vpu_addsub_overflow", 597 "tpc_vpu_abs_overflow", 598 "tpc_vpu_convert_fp_dst_nan", 599 "tpc_vpu_convert_fp_dst_inf", 600 "tpc_vpu_fma_fp_dst_nan", 601 "tpc_vpu_fma_fp_dst_inf", 602 "tpc_vpu_fp_dst_denorm", 603 "tpc_assertions", 604 "tpc_illegal_instruction", 605 "tpc_pc_wrap_around", 606 "tpc_qm_sw_err", 607 "tpc_hbw_rresp_err", 608 "tpc_hbw_bresp_err", 609 "tpc_lbw_rresp_err", 610 "tpc_lbw_bresp_err", 611 "st_unlock_already_locked", 612 "invalid_lock_access", 613 "LD_L protection violation", 614 "ST_L protection violation", 615 }; 616 617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 618 "agu_resp_intr", 619 "qman_axi_err", 620 "wap sei (wbc axi err)", 621 "arc sei", 622 "cfg access error", 623 "qm_sw_err", 624 "sbte_dbg_intr_0", 625 "sbte_dbg_intr_1", 626 "sbte_dbg_intr_2", 627 "sbte_dbg_intr_3", 628 "sbte_dbg_intr_4", 629 "sbte_prtn_intr_0", 630 "sbte_prtn_intr_1", 631 "sbte_prtn_intr_2", 632 "sbte_prtn_intr_3", 633 "sbte_prtn_intr_4", 634 }; 635 636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 637 "i0", 638 "i1", 639 "i2", 640 "i3", 641 "i4", 642 }; 643 644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 645 "WBC ERR RESP_0", 646 "WBC ERR RESP_1", 647 "AP SOURCE POS INF", 648 "AP SOURCE NEG INF", 649 "AP SOURCE NAN", 650 "AP RESULT POS INF", 651 "AP RESULT NEG INF", 652 }; 653 654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 655 "HBW Read returned with error RRESP", 656 "HBW write returned with error BRESP", 657 "LBW write returned with error BRESP", 658 "descriptor_fifo_overflow", 659 "KDMA SB LBW Read returned with error", 660 "KDMA WBC LBW Write returned with error", 661 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 662 "WRONG CFG FOR COMMIT IN LIN DMA" 663 }; 664 665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 666 "HBW/LBW Read returned with error RRESP", 667 "HBW/LBW write returned with error BRESP", 668 "LBW write returned with error BRESP", 669 "descriptor_fifo_overflow", 670 "KDMA SB LBW Read returned with error", 671 "KDMA WBC LBW Write returned with error", 672 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 673 "WRONG CFG FOR COMMIT IN LIN DMA" 674 }; 675 676 struct gaudi2_sm_sei_cause_data { 677 const char *cause_name; 678 const char *log_name; 679 }; 680 681 static const struct gaudi2_sm_sei_cause_data 682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 683 {"calculated SO value overflow/underflow", "SOB ID"}, 684 {"payload address of monitor is not aligned to 4B", "monitor addr"}, 685 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"}, 686 }; 687 688 static const char * const 689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 690 "LATENCY_RD_OUT_FIFO_OVERRUN", 691 "LATENCY_WR_OUT_FIFO_OVERRUN", 692 }; 693 694 static const char * const 695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 696 "LATENCY_RD_OUT_FIFO_OVERRUN", 697 "LATENCY_WR_OUT_FIFO_OVERRUN", 698 }; 699 700 static const char * const 701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 702 "AXI drain HBW", 703 "AXI drain LBW", 704 }; 705 706 static const char * const 707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 708 "HBW error response", 709 "LBW error response", 710 "TLP is blocked by RR" 711 }; 712 713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 714 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 715 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 716 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 717 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 718 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 719 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 720 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 721 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 722 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 728 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 729 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 730 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 732 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 733 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 734 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 736 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 737 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 738 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 740 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 741 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 742 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 744 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 745 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 746 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 748 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 749 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 750 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 752 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 753 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 754 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 756 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 757 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 758 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 760 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 761 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 762 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 768 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 769 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 770 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 772 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 773 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 774 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 776 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 777 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 778 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 780 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 781 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 782 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 784 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 785 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 786 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 788 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 789 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 790 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 792 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 793 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 794 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 796 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 797 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 798 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 804 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 805 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 806 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 808 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 809 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 810 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 812 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 813 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 814 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 816 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 817 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 818 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 820 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 821 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 822 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 824 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 825 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 826 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 828 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 829 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 830 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 832 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 833 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 834 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 840 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 841 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 842 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 844 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 845 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 846 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 848 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 849 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 850 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 852 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 853 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 854 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 856 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 857 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 858 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 860 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 861 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 862 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 864 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 865 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 866 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 868 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 869 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 870 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 871 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 872 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 873 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 874 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 875 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 876 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 877 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 878 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 879 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 880 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 881 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 882 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 883 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 884 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 885 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 886 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 887 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 888 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 889 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 890 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 891 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 892 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 893 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 894 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 895 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 896 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 897 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 898 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 899 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 900 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 901 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 902 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 903 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 904 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 905 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 906 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 907 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 908 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 909 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 910 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 911 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 912 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 913 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 914 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 915 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 916 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 917 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 918 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 919 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 920 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 921 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 922 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 923 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 924 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 925 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 926 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 927 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 928 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 929 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 930 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 931 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 932 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 933 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 934 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 935 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 936 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 937 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 938 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 939 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 940 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 941 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 942 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 943 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 944 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 945 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 946 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 947 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 948 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 949 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 950 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 951 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 952 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 953 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 954 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 955 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 956 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 957 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 958 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 959 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 960 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 961 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 962 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 963 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 964 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 965 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 966 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 967 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 968 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 969 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 970 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 971 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 972 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 973 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 974 }; 975 976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 977 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 978 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 979 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 980 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 981 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 982 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 983 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 984 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 985 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 986 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 987 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 988 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 989 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 990 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 991 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 992 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 993 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 994 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 995 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 996 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 997 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 998 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 999 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1000 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1001 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1002 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1003 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1004 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1005 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1006 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1007 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1008 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1009 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1010 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1011 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1012 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1013 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1014 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1015 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1016 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1017 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1018 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1019 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1020 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1021 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1022 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1023 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1024 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1025 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1026 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1027 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1028 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1029 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1030 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1031 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1032 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1033 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1034 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1035 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1036 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1037 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1038 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1039 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1040 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1041 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1042 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1043 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1044 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1045 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1046 }; 1047 1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1049 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1050 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1051 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1052 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1053 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1054 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1055 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1056 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1057 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1058 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1059 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1060 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1061 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1062 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1063 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1064 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1065 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1066 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1067 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1068 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1069 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1070 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1071 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1072 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1073 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1074 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1075 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1076 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1077 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1078 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1079 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1080 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1081 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1082 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1083 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1084 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1085 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1086 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1087 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1088 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1089 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1090 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1091 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1092 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1093 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1094 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1095 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1096 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1097 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1098 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1099 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1100 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1101 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1102 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1103 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1104 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1105 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1106 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1107 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1108 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1109 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1110 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1111 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1112 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1113 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1114 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1115 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1116 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1117 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1118 }; 1119 1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1121 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1122 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1123 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1124 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1125 }; 1126 1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1128 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1129 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1130 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1131 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1132 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1133 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1134 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1135 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1136 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1142 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1143 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1144 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1146 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1147 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1148 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1174 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1175 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1176 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1182 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1183 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1184 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1186 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1187 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1188 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1210 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1211 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1212 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1218 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1219 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1220 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1222 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1223 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1224 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1246 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1247 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1248 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1254 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1255 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1256 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1258 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1259 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1260 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1282 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1283 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1284 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1285 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1286 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1287 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1288 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1289 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1290 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1291 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1292 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1293 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1294 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1295 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1296 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1297 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1298 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1299 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1300 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1301 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1302 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1303 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1304 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1305 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1306 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1307 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1308 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1309 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1310 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1311 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1312 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1313 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1314 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1315 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1316 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1317 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1318 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1319 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1320 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1321 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1322 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1323 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1324 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1325 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1326 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1327 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1328 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1329 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1330 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1331 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1332 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1333 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1334 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1335 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1336 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1337 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1338 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1339 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1340 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1341 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1342 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1343 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1344 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1345 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1346 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1347 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1348 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1349 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1350 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1351 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1352 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1353 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1354 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1355 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1356 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1357 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1358 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1359 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1360 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1361 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1362 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1363 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1364 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1365 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1366 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1367 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1368 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1369 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1370 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1371 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1372 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1373 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1374 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1375 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1376 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1377 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1378 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1379 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1380 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1381 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1382 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1383 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1384 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1385 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1386 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1387 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1388 }; 1389 1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1391 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1392 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1393 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1394 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1395 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1396 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1397 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1398 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1399 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1400 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1401 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1402 }; 1403 1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1405 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1406 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1407 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1408 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1409 }; 1410 1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1412 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1413 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1414 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1415 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1416 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1417 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1418 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1419 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1420 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1421 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1422 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1423 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1424 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1425 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1426 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1427 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1428 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1429 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1430 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1431 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1432 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1433 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1434 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1435 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1436 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1437 }; 1438 1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1440 [ROTATOR_ID_0] = mmROT0_BASE, 1441 [ROTATOR_ID_1] = mmROT1_BASE 1442 }; 1443 1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1445 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1446 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1447 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1448 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1449 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1450 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1451 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1452 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1453 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1454 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1455 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1456 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1457 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1458 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1459 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1460 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1461 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1462 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1463 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1464 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1465 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1466 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1467 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1468 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1469 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1470 }; 1471 1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1473 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1474 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1475 }; 1476 1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1478 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1479 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1480 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1481 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1482 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1483 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1484 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1485 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1486 }; 1487 1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1489 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1490 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1491 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1492 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1493 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1494 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1495 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1496 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1497 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1498 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1499 }; 1500 1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = { 1502 RTR_ID_X_Y(2, 4), 1503 RTR_ID_X_Y(3, 4), 1504 RTR_ID_X_Y(4, 4), 1505 RTR_ID_X_Y(5, 4), 1506 RTR_ID_X_Y(6, 4), 1507 RTR_ID_X_Y(7, 4), 1508 RTR_ID_X_Y(8, 4), 1509 RTR_ID_X_Y(9, 4), 1510 RTR_ID_X_Y(10, 4), 1511 RTR_ID_X_Y(11, 4), 1512 RTR_ID_X_Y(12, 4), 1513 RTR_ID_X_Y(13, 4), 1514 RTR_ID_X_Y(14, 4), 1515 RTR_ID_X_Y(15, 4), 1516 RTR_ID_X_Y(16, 4), 1517 RTR_ID_X_Y(17, 4), 1518 RTR_ID_X_Y(2, 11), 1519 RTR_ID_X_Y(3, 11), 1520 RTR_ID_X_Y(4, 11), 1521 RTR_ID_X_Y(5, 11), 1522 RTR_ID_X_Y(6, 11), 1523 RTR_ID_X_Y(7, 11), 1524 RTR_ID_X_Y(8, 11), 1525 RTR_ID_X_Y(9, 11), 1526 RTR_ID_X_Y(0, 0),/* 24 no id */ 1527 RTR_ID_X_Y(0, 0),/* 25 no id */ 1528 RTR_ID_X_Y(0, 0),/* 26 no id */ 1529 RTR_ID_X_Y(0, 0),/* 27 no id */ 1530 RTR_ID_X_Y(14, 11), 1531 RTR_ID_X_Y(15, 11), 1532 RTR_ID_X_Y(16, 11), 1533 RTR_ID_X_Y(17, 11) 1534 }; 1535 1536 enum rtr_id { 1537 DCORE0_RTR0, 1538 DCORE0_RTR1, 1539 DCORE0_RTR2, 1540 DCORE0_RTR3, 1541 DCORE0_RTR4, 1542 DCORE0_RTR5, 1543 DCORE0_RTR6, 1544 DCORE0_RTR7, 1545 DCORE1_RTR0, 1546 DCORE1_RTR1, 1547 DCORE1_RTR2, 1548 DCORE1_RTR3, 1549 DCORE1_RTR4, 1550 DCORE1_RTR5, 1551 DCORE1_RTR6, 1552 DCORE1_RTR7, 1553 DCORE2_RTR0, 1554 DCORE2_RTR1, 1555 DCORE2_RTR2, 1556 DCORE2_RTR3, 1557 DCORE2_RTR4, 1558 DCORE2_RTR5, 1559 DCORE2_RTR6, 1560 DCORE2_RTR7, 1561 DCORE3_RTR0, 1562 DCORE3_RTR1, 1563 DCORE3_RTR2, 1564 DCORE3_RTR3, 1565 DCORE3_RTR4, 1566 DCORE3_RTR5, 1567 DCORE3_RTR6, 1568 DCORE3_RTR7, 1569 }; 1570 1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1572 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1573 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1574 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1575 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1576 DCORE0_RTR0 1577 }; 1578 1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { 1580 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1581 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1582 }; 1583 1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { 1585 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1586 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1587 }; 1588 1589 struct sft_info { 1590 u8 interface_id; 1591 u8 dcore_id; 1592 }; 1593 1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1595 {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3}, 1596 }; 1597 1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { 1599 DCORE0_RTR0, DCORE0_RTR0 1600 }; 1601 1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { 1603 DCORE2_RTR0, DCORE3_RTR7 1604 }; 1605 1606 struct mme_initiators_rtr_id { 1607 u32 wap0; 1608 u32 wap1; 1609 u32 write; 1610 u32 read; 1611 u32 sbte0; 1612 u32 sbte1; 1613 u32 sbte2; 1614 u32 sbte3; 1615 u32 sbte4; 1616 }; 1617 1618 enum mme_initiators { 1619 MME_WAP0 = 0, 1620 MME_WAP1, 1621 MME_WRITE, 1622 MME_READ, 1623 MME_SBTE0, 1624 MME_SBTE1, 1625 MME_SBTE2, 1626 MME_SBTE3, 1627 MME_SBTE4, 1628 MME_INITIATORS_MAX 1629 }; 1630 1631 static const struct mme_initiators_rtr_id 1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1633 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1634 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1635 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1636 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1637 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1638 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1639 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1640 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1641 }; 1642 1643 enum razwi_event_sources { 1644 RAZWI_TPC, 1645 RAZWI_MME, 1646 RAZWI_EDMA, 1647 RAZWI_PDMA, 1648 RAZWI_NIC, 1649 RAZWI_DEC, 1650 RAZWI_ROT 1651 }; 1652 1653 struct hbm_mc_error_causes { 1654 u32 mask; 1655 char cause[50]; 1656 }; 1657 1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 1659 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 1660 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 1661 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 1662 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 1663 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 1664 }; 1665 1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 1667 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 1668 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 1669 [HBM_SEI_READ_ERR] = "SEI read data error", 1670 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 1671 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 1672 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 1673 [HBM_SEI_DFI] = "SEI DFI error", 1674 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 1675 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 1676 }; 1677 1678 struct mmu_spi_sei_cause { 1679 char cause[50]; 1680 int clear_bit; 1681 }; 1682 1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 1684 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 1685 {"page access", 1}, /* INTERRUPT_CLR[1] */ 1686 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 1687 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 1688 {"mmu rei0", -1}, /* no clear register bit */ 1689 {"mmu rei1", -1}, /* no clear register bit */ 1690 {"stlb rei0", -1}, /* no clear register bit */ 1691 {"stlb rei1", -1}, /* no clear register bit */ 1692 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 1693 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 1694 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 1695 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 1696 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1697 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1698 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1699 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1700 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 1701 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 1702 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 1703 }; 1704 1705 struct gaudi2_cache_invld_params { 1706 u64 start_va; 1707 u64 end_va; 1708 u32 inv_start_val; 1709 u32 flags; 1710 bool range_invalidation; 1711 }; 1712 1713 struct gaudi2_tpc_idle_data { 1714 struct engines_data *e; 1715 unsigned long *mask; 1716 bool *is_idle; 1717 const char *tpc_fmt; 1718 }; 1719 1720 struct gaudi2_tpc_mmu_data { 1721 u32 rw_asid; 1722 }; 1723 1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 1725 1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 1733 bool is_memset); 1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 1735 1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 1737 { 1738 1739 } 1740 1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 1742 { 1743 return sizeof(struct packet_msg_short); 1744 } 1745 1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 1747 { 1748 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 1749 } 1750 1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 1752 { 1753 struct asic_fixed_properties *prop = &hdev->asic_prop; 1754 int dcore, inst, tpc_seq; 1755 u32 offset; 1756 1757 /* init the return code */ 1758 ctx->rc = 0; 1759 1760 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 1761 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 1762 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 1763 1764 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 1765 continue; 1766 1767 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 1768 1769 ctx->fn(hdev, dcore, inst, offset, ctx); 1770 if (ctx->rc) { 1771 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 1772 dcore, inst); 1773 return; 1774 } 1775 } 1776 } 1777 1778 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 1779 return; 1780 1781 /* special check for PCI TPC (DCORE0_TPC6) */ 1782 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 1783 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 1784 if (ctx->rc) 1785 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 1786 } 1787 1788 static bool gaudi2_host_phys_addr_valid(u64 addr) 1789 { 1790 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 1791 return true; 1792 1793 return false; 1794 } 1795 1796 static int set_number_of_functional_hbms(struct hl_device *hdev) 1797 { 1798 struct asic_fixed_properties *prop = &hdev->asic_prop; 1799 u8 faulty_hbms = hweight64(hdev->dram_binning); 1800 1801 /* check if all HBMs should be used */ 1802 if (!faulty_hbms) { 1803 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 1804 prop->num_functional_hbms = GAUDI2_HBM_NUM; 1805 return 0; 1806 } 1807 1808 /* 1809 * check for error condition in which number of binning 1810 * candidates is higher than the maximum supported by the 1811 * driver (in which case binning mask shall be ignored and driver will 1812 * set the default) 1813 */ 1814 if (faulty_hbms > MAX_FAULTY_HBMS) { 1815 dev_err(hdev->dev, 1816 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 1817 MAX_FAULTY_HBMS, hdev->dram_binning); 1818 return -EINVAL; 1819 } 1820 1821 /* 1822 * by default, number of functional HBMs in Gaudi2 is always 1823 * GAUDI2_HBM_NUM - 1. 1824 */ 1825 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 1826 return 0; 1827 } 1828 1829 static int gaudi2_set_dram_properties(struct hl_device *hdev) 1830 { 1831 struct asic_fixed_properties *prop = &hdev->asic_prop; 1832 u32 basic_hbm_page_size; 1833 int rc; 1834 1835 rc = set_number_of_functional_hbms(hdev); 1836 if (rc) 1837 return -EINVAL; 1838 1839 /* 1840 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 1841 * in which we are using x16 bigger page size to be able to populate the entire 1842 * HBM mappings in the TLB 1843 */ 1844 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 1845 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 1846 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 1847 prop->dram_size = prop->num_functional_hbms * SZ_16G; 1848 prop->dram_base_address = DRAM_PHYS_BASE; 1849 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 1850 prop->dram_supports_virtual_memory = true; 1851 1852 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 1853 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 1854 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 1855 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 1856 1857 /* since DRAM page size differs from DMMU page size we need to allocate 1858 * DRAM memory in units of dram_page size and mapping this memory in 1859 * units of DMMU page size. we overcome this size mismatch using a 1860 * scrambling routine which takes a DRAM page and converts it to a DMMU 1861 * page. 1862 * We therefore: 1863 * 1. partition the virtual address space to DRAM-page (whole) pages. 1864 * (suppose we get n such pages) 1865 * 2. limit the amount of virtual address space we got from 1 above to 1866 * a multiple of 64M as we don't want the scrambled address to cross 1867 * the DRAM virtual address space. 1868 * ( m = (n * DRAM_page_size) / DMMU_page_size). 1869 * 3. determine the and address accordingly 1870 * end_addr = start_addr + m * 48M 1871 * 1872 * the DRAM address MSBs (63:48) are not part of the roundup calculation 1873 */ 1874 prop->dmmu.start_addr = prop->dram_base_address + 1875 (prop->dram_page_size * 1876 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 1877 1878 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 1879 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 1880 1881 return 0; 1882 } 1883 1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 1885 { 1886 struct asic_fixed_properties *prop = &hdev->asic_prop; 1887 struct hw_queue_properties *q_props; 1888 u32 num_sync_stream_queues = 0; 1889 int i; 1890 1891 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 1892 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 1893 GFP_KERNEL); 1894 1895 if (!prop->hw_queues_props) 1896 return -ENOMEM; 1897 1898 q_props = prop->hw_queues_props; 1899 1900 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 1901 q_props[i].type = QUEUE_TYPE_HW; 1902 q_props[i].driver_only = 0; 1903 1904 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 1905 q_props[i].supports_sync_stream = 0; 1906 } else { 1907 q_props[i].supports_sync_stream = 1; 1908 num_sync_stream_queues++; 1909 } 1910 1911 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 1912 } 1913 1914 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 1915 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 1916 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 1917 1918 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 1919 prop->cfg_base_address = CFG_BASE; 1920 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 1921 prop->host_base_address = HOST_PHYS_BASE_0; 1922 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 1923 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 1924 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 1925 prop->user_dec_intr_count = NUMBER_OF_DEC; 1926 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 1927 prop->completion_mode = HL_COMPLETION_MODE_CS; 1928 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 1929 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 1930 1931 prop->sram_base_address = SRAM_BASE_ADDR; 1932 prop->sram_size = SRAM_SIZE; 1933 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 1934 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 1935 1936 prop->hints_range_reservation = true; 1937 1938 if (hdev->pldm) 1939 prop->mmu_pgt_size = 0x800000; /* 8MB */ 1940 else 1941 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 1942 1943 prop->mmu_pte_size = HL_PTE_SIZE; 1944 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 1945 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 1946 1947 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 1948 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 1949 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 1950 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 1951 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 1952 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 1953 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 1954 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 1955 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 1956 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 1957 prop->dmmu.page_size = PAGE_SIZE_1GB; 1958 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 1959 prop->dmmu.last_mask = LAST_MASK; 1960 prop->dmmu.host_resident = 1; 1961 /* TODO: will be duplicated until implementing per-MMU props */ 1962 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 1963 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1964 1965 /* 1966 * this is done in order to be able to validate FW descriptor (i.e. validating that 1967 * the addresses and allocated space for FW image does not cross memory bounds). 1968 * for this reason we set the DRAM size to the minimum possible and later it will 1969 * be modified according to what reported in the cpucp info packet 1970 */ 1971 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 1972 1973 hdev->pmmu_huge_range = true; 1974 prop->pmmu.host_resident = 1; 1975 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 1976 prop->pmmu.last_mask = LAST_MASK; 1977 /* TODO: will be duplicated until implementing per-MMU props */ 1978 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 1979 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1980 1981 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 1982 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 1983 prop->hints_host_hpage_reserved_va_range.start_addr = 1984 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 1985 prop->hints_host_hpage_reserved_va_range.end_addr = 1986 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 1987 1988 if (PAGE_SIZE == SZ_64K) { 1989 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 1990 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 1991 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 1992 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 1993 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 1994 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 1995 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 1996 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 1997 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 1998 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 1999 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2000 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2001 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2002 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2003 prop->pmmu.page_size = PAGE_SIZE_64KB; 2004 2005 /* shifts and masks are the same in PMMU and HPMMU */ 2006 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2007 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2008 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2009 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2010 } else { 2011 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2012 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2013 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2014 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2015 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2016 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2017 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2018 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2019 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2020 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2021 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2022 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2023 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2024 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2025 prop->pmmu.page_size = PAGE_SIZE_4KB; 2026 2027 /* shifts and masks are the same in PMMU and HPMMU */ 2028 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2029 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2030 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2031 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2032 } 2033 2034 prop->num_engine_cores = CPU_ID_MAX; 2035 prop->cfg_size = CFG_SIZE; 2036 prop->max_asid = MAX_ASID; 2037 prop->num_of_events = GAUDI2_EVENT_SIZE; 2038 2039 prop->dc_power_default = DC_POWER_DEFAULT; 2040 2041 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2042 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2043 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2044 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2045 2046 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2047 2048 prop->mme_master_slave_mode = 1; 2049 2050 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2051 (num_sync_stream_queues * HL_RSVD_SOBS); 2052 2053 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2054 (num_sync_stream_queues * HL_RSVD_MONS); 2055 2056 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2057 2058 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2059 2060 prop->fw_cpu_boot_dev_sts0_valid = false; 2061 prop->fw_cpu_boot_dev_sts1_valid = false; 2062 prop->hard_reset_done_by_fw = false; 2063 prop->gic_interrupts_enable = true; 2064 2065 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2066 2067 prop->max_dec = NUMBER_OF_DEC; 2068 2069 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2070 2071 prop->dma_mask = 64; 2072 2073 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; 2074 2075 return 0; 2076 } 2077 2078 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2079 { 2080 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2081 bool is_wc[3] = {false, false, true}; 2082 int rc; 2083 2084 rc = hl_pci_bars_map(hdev, name, is_wc); 2085 if (rc) 2086 return rc; 2087 2088 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2089 2090 return 0; 2091 } 2092 2093 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2094 { 2095 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2096 struct hl_inbound_pci_region pci_region; 2097 u64 old_addr = addr; 2098 int rc; 2099 2100 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2101 return old_addr; 2102 2103 if (hdev->asic_prop.iatu_done_by_fw) 2104 return U64_MAX; 2105 2106 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2107 pci_region.mode = PCI_BAR_MATCH_MODE; 2108 pci_region.bar = DRAM_BAR_ID; 2109 pci_region.addr = addr; 2110 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2111 if (rc) 2112 return U64_MAX; 2113 2114 if (gaudi2) { 2115 old_addr = gaudi2->dram_bar_cur_addr; 2116 gaudi2->dram_bar_cur_addr = addr; 2117 } 2118 2119 return old_addr; 2120 } 2121 2122 static int gaudi2_init_iatu(struct hl_device *hdev) 2123 { 2124 struct hl_inbound_pci_region inbound_region; 2125 struct hl_outbound_pci_region outbound_region; 2126 u32 bar_addr_low, bar_addr_high; 2127 int rc; 2128 2129 if (hdev->asic_prop.iatu_done_by_fw) 2130 return 0; 2131 2132 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2133 * We must map this region in BAR match mode in order to 2134 * fetch BAR physical base address 2135 */ 2136 inbound_region.mode = PCI_BAR_MATCH_MODE; 2137 inbound_region.bar = SRAM_CFG_BAR_ID; 2138 /* Base address must be aligned to Bar size which is 256 MB */ 2139 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2140 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2141 if (rc) 2142 return rc; 2143 2144 /* Fetch physical BAR address */ 2145 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2146 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2147 2148 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2149 2150 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2151 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2152 inbound_region.bar = SRAM_CFG_BAR_ID; 2153 inbound_region.offset_in_bar = 0; 2154 inbound_region.addr = STM_FLASH_BASE_ADDR; 2155 inbound_region.size = CFG_REGION_SIZE; 2156 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2157 if (rc) 2158 return rc; 2159 2160 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2161 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2162 inbound_region.bar = SRAM_CFG_BAR_ID; 2163 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2164 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2165 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2166 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2167 if (rc) 2168 return rc; 2169 2170 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2171 inbound_region.mode = PCI_BAR_MATCH_MODE; 2172 inbound_region.bar = DRAM_BAR_ID; 2173 inbound_region.addr = DRAM_PHYS_BASE; 2174 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2175 if (rc) 2176 return rc; 2177 2178 /* Outbound Region 0 - Point to Host */ 2179 outbound_region.addr = HOST_PHYS_BASE_0; 2180 outbound_region.size = HOST_PHYS_SIZE_0; 2181 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2182 2183 return rc; 2184 } 2185 2186 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2187 { 2188 return RREG32(mmHW_STATE); 2189 } 2190 2191 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2192 { 2193 struct asic_fixed_properties *prop = &hdev->asic_prop; 2194 2195 /* 2196 * check for error condition in which number of binning candidates 2197 * is higher than the maximum supported by the driver 2198 */ 2199 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2200 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2201 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2202 hdev->tpc_binning); 2203 return -EINVAL; 2204 } 2205 2206 prop->tpc_binning_mask = hdev->tpc_binning; 2207 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2208 2209 return 0; 2210 } 2211 2212 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2213 { 2214 struct asic_fixed_properties *prop = &hdev->asic_prop; 2215 struct hw_queue_properties *q_props = prop->hw_queues_props; 2216 u64 tpc_binning_mask; 2217 u8 subst_idx = 0; 2218 int i, rc; 2219 2220 rc = gaudi2_tpc_binning_init_prop(hdev); 2221 if (rc) 2222 return rc; 2223 2224 tpc_binning_mask = prop->tpc_binning_mask; 2225 2226 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2227 u8 subst_seq, binned, qid_base; 2228 2229 if (tpc_binning_mask == 0) 2230 break; 2231 2232 if (subst_idx == 0) { 2233 subst_seq = TPC_ID_DCORE0_TPC6; 2234 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2235 } else { 2236 subst_seq = TPC_ID_DCORE3_TPC5; 2237 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2238 } 2239 2240 2241 /* clear bit from mask */ 2242 binned = __ffs(tpc_binning_mask); 2243 /* 2244 * Coverity complains about possible out-of-bound access in 2245 * clear_bit 2246 */ 2247 if (binned >= TPC_ID_SIZE) { 2248 dev_err(hdev->dev, 2249 "Invalid binned TPC (binning mask: %llx)\n", 2250 tpc_binning_mask); 2251 return -EINVAL; 2252 } 2253 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2254 2255 /* also clear replacing TPC bit from enabled mask */ 2256 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2257 2258 /* bin substite TPC's Qs */ 2259 q_props[qid_base].binned = 1; 2260 q_props[qid_base + 1].binned = 1; 2261 q_props[qid_base + 2].binned = 1; 2262 q_props[qid_base + 3].binned = 1; 2263 2264 subst_idx++; 2265 } 2266 2267 return 0; 2268 } 2269 2270 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2271 { 2272 struct asic_fixed_properties *prop = &hdev->asic_prop; 2273 u8 num_faulty; 2274 2275 num_faulty = hweight32(hdev->decoder_binning); 2276 2277 /* 2278 * check for error condition in which number of binning candidates 2279 * is higher than the maximum supported by the driver 2280 */ 2281 if (num_faulty > MAX_FAULTY_DECODERS) { 2282 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2283 hdev->decoder_binning); 2284 return -EINVAL; 2285 } 2286 2287 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2288 2289 if (prop->decoder_binning_mask) 2290 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2291 else 2292 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2293 2294 return 0; 2295 } 2296 2297 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2298 { 2299 struct asic_fixed_properties *prop = &hdev->asic_prop; 2300 2301 /* check if we should override default binning */ 2302 if (!hdev->dram_binning) { 2303 prop->dram_binning_mask = 0; 2304 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2305 return; 2306 } 2307 2308 /* set DRAM binning constraints */ 2309 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2310 prop->dram_binning_mask = hdev->dram_binning; 2311 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2312 } 2313 2314 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2315 { 2316 struct asic_fixed_properties *prop = &hdev->asic_prop; 2317 struct hw_queue_properties *q_props; 2318 u8 seq, num_faulty; 2319 2320 num_faulty = hweight32(hdev->edma_binning); 2321 2322 /* 2323 * check for error condition in which number of binning candidates 2324 * is higher than the maximum supported by the driver 2325 */ 2326 if (num_faulty > MAX_FAULTY_EDMAS) { 2327 dev_err(hdev->dev, 2328 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2329 hdev->edma_binning); 2330 return -EINVAL; 2331 } 2332 2333 if (!hdev->edma_binning) { 2334 prop->edma_binning_mask = 0; 2335 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2336 return 0; 2337 } 2338 2339 seq = __ffs((unsigned long)hdev->edma_binning); 2340 2341 /* set binning constraints */ 2342 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2343 prop->edma_binning_mask = hdev->edma_binning; 2344 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2345 2346 /* bin substitute EDMA's queue */ 2347 q_props = prop->hw_queues_props; 2348 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2349 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2350 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2351 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2352 2353 return 0; 2354 } 2355 2356 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2357 { 2358 struct asic_fixed_properties *prop = &hdev->asic_prop; 2359 u8 num_faulty, seq; 2360 2361 /* check if we should override default binning */ 2362 if (!xbar_edge_iso_mask) { 2363 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2364 return 0; 2365 } 2366 2367 /* 2368 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2369 * only the FW can set a redundancy value). for user it'll always be 0. 2370 */ 2371 num_faulty = hweight32(xbar_edge_iso_mask); 2372 2373 /* 2374 * check for error condition in which number of binning candidates 2375 * is higher than the maximum supported by the driver 2376 */ 2377 if (num_faulty > MAX_FAULTY_XBARS) { 2378 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2379 MAX_FAULTY_XBARS); 2380 return -EINVAL; 2381 } 2382 2383 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2384 2385 /* set binning constraints */ 2386 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2387 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2388 2389 return 0; 2390 } 2391 2392 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2393 { 2394 int rc; 2395 2396 /* 2397 * mark all clusters as good, each component will "fail" cluster 2398 * based on eFuse/user values. 2399 * If more than single cluster is faulty- the chip is unusable 2400 */ 2401 hdev->asic_prop.faulty_dram_cluster_map = 0; 2402 2403 gaudi2_set_dram_binning_masks(hdev); 2404 2405 rc = gaudi2_set_edma_binning_masks(hdev); 2406 if (rc) 2407 return rc; 2408 2409 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2410 if (rc) 2411 return rc; 2412 2413 2414 /* always initially set to full mask */ 2415 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2416 2417 return 0; 2418 } 2419 2420 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2421 { 2422 struct asic_fixed_properties *prop = &hdev->asic_prop; 2423 int rc; 2424 2425 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2426 if (rc) 2427 return rc; 2428 2429 /* if we have DRAM binning reported by FW we should perform cluster config */ 2430 if (prop->faulty_dram_cluster_map) { 2431 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2432 2433 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2434 } 2435 2436 return 0; 2437 } 2438 2439 static int gaudi2_set_binning_masks(struct hl_device *hdev) 2440 { 2441 int rc; 2442 2443 rc = gaudi2_set_cluster_binning_masks(hdev); 2444 if (rc) 2445 return rc; 2446 2447 rc = gaudi2_set_tpc_binning_masks(hdev); 2448 if (rc) 2449 return rc; 2450 2451 rc = gaudi2_set_dec_binning_masks(hdev); 2452 if (rc) 2453 return rc; 2454 2455 return 0; 2456 } 2457 2458 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2459 { 2460 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2461 struct asic_fixed_properties *prop = &hdev->asic_prop; 2462 long max_power; 2463 u64 dram_size; 2464 int rc; 2465 2466 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2467 return 0; 2468 2469 /* No point of asking this information again when not doing hard reset, as the device 2470 * CPU hasn't been reset 2471 */ 2472 if (hdev->reset_info.in_compute_reset) 2473 return 0; 2474 2475 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2476 mmCPU_BOOT_ERR1); 2477 if (rc) 2478 return rc; 2479 2480 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2481 if (dram_size) { 2482 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2483 2484 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2485 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2486 dev_err(hdev->dev, 2487 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2488 dram_size, prop->dram_size); 2489 dram_size = prop->dram_size; 2490 } 2491 2492 prop->dram_size = dram_size; 2493 prop->dram_end_address = prop->dram_base_address + dram_size; 2494 } 2495 2496 if (!strlen(prop->cpucp_info.card_name)) 2497 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2498 2499 /* Overwrite binning masks with the actual binning values from F/W */ 2500 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2501 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2502 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2503 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2504 2505 /* 2506 * at this point the DRAM parameters need to be updated according to data obtained 2507 * from the FW 2508 */ 2509 rc = hdev->asic_funcs->set_dram_properties(hdev); 2510 if (rc) 2511 return rc; 2512 2513 rc = hdev->asic_funcs->set_binning_masks(hdev); 2514 if (rc) 2515 return rc; 2516 2517 max_power = hl_fw_get_max_power(hdev); 2518 if (max_power < 0) 2519 return max_power; 2520 2521 prop->max_power_default = (u64) max_power; 2522 2523 return 0; 2524 } 2525 2526 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2527 { 2528 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2529 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2530 int rc; 2531 2532 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2533 return 0; 2534 2535 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2536 if (rc) 2537 return rc; 2538 2539 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2540 2541 return 0; 2542 } 2543 2544 static int gaudi2_early_init(struct hl_device *hdev) 2545 { 2546 struct asic_fixed_properties *prop = &hdev->asic_prop; 2547 struct pci_dev *pdev = hdev->pdev; 2548 resource_size_t pci_bar_size; 2549 int rc; 2550 2551 rc = gaudi2_set_fixed_properties(hdev); 2552 if (rc) 2553 return rc; 2554 2555 /* Check BAR sizes */ 2556 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2557 2558 if (pci_bar_size != CFG_BAR_SIZE) { 2559 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2560 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2561 rc = -ENODEV; 2562 goto free_queue_props; 2563 } 2564 2565 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2566 if (pci_bar_size != MSIX_BAR_SIZE) { 2567 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2568 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2569 rc = -ENODEV; 2570 goto free_queue_props; 2571 } 2572 2573 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2574 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2575 2576 /* 2577 * Only in pldm driver config iATU 2578 */ 2579 if (hdev->pldm) 2580 hdev->asic_prop.iatu_done_by_fw = false; 2581 else 2582 hdev->asic_prop.iatu_done_by_fw = true; 2583 2584 rc = hl_pci_init(hdev); 2585 if (rc) 2586 goto free_queue_props; 2587 2588 /* Before continuing in the initialization, we need to read the preboot 2589 * version to determine whether we run with a security-enabled firmware 2590 */ 2591 rc = hl_fw_read_preboot_status(hdev); 2592 if (rc) { 2593 if (hdev->reset_on_preboot_fail) 2594 hdev->asic_funcs->hw_fini(hdev, true, false); 2595 goto pci_fini; 2596 } 2597 2598 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2599 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2600 hdev->asic_funcs->hw_fini(hdev, true, false); 2601 } 2602 2603 return 0; 2604 2605 pci_fini: 2606 hl_pci_fini(hdev); 2607 free_queue_props: 2608 kfree(hdev->asic_prop.hw_queues_props); 2609 return rc; 2610 } 2611 2612 static int gaudi2_early_fini(struct hl_device *hdev) 2613 { 2614 kfree(hdev->asic_prop.hw_queues_props); 2615 hl_pci_fini(hdev); 2616 2617 return 0; 2618 } 2619 2620 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 2621 { 2622 switch (arc_id) { 2623 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 2624 return true; 2625 default: 2626 return false; 2627 } 2628 } 2629 2630 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 2631 { 2632 switch (arc_id) { 2633 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 2634 return true; 2635 default: 2636 return false; 2637 } 2638 } 2639 2640 static void gaudi2_init_arcs(struct hl_device *hdev) 2641 { 2642 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2643 u64 arc_id; 2644 u32 i; 2645 2646 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 2647 if (gaudi2_is_arc_enabled(hdev, i)) 2648 continue; 2649 2650 gaudi2_set_arc_id_cap(hdev, i); 2651 } 2652 2653 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 2654 if (!gaudi2_is_queue_enabled(hdev, i)) 2655 continue; 2656 2657 arc_id = gaudi2_queue_id_to_arc_id[i]; 2658 if (gaudi2_is_arc_enabled(hdev, arc_id)) 2659 continue; 2660 2661 if (gaudi2_is_arc_nic_owned(arc_id) && 2662 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 2663 continue; 2664 2665 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 2666 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 2667 continue; 2668 2669 gaudi2_set_arc_id_cap(hdev, arc_id); 2670 } 2671 } 2672 2673 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 2674 { 2675 u32 reg_base, reg_val; 2676 int rc; 2677 2678 switch (cpu_id) { 2679 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 2680 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 2681 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2682 ARC_DCCM_BLOCK_SIZE * 2, true); 2683 if (rc) 2684 return rc; 2685 break; 2686 case CPU_ID_SCHED_ARC4: 2687 case CPU_ID_SCHED_ARC5: 2688 case CPU_ID_MME_QMAN_ARC0: 2689 case CPU_ID_MME_QMAN_ARC1: 2690 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 2691 2692 /* Scrub lower DCCM block */ 2693 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2694 ARC_DCCM_BLOCK_SIZE, true); 2695 if (rc) 2696 return rc; 2697 2698 /* Switch to upper DCCM block */ 2699 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 2700 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2701 2702 /* Scrub upper DCCM block */ 2703 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2704 ARC_DCCM_BLOCK_SIZE, true); 2705 if (rc) 2706 return rc; 2707 2708 /* Switch to lower DCCM block */ 2709 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 2710 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2711 break; 2712 default: 2713 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2714 ARC_DCCM_BLOCK_SIZE, true); 2715 if (rc) 2716 return rc; 2717 } 2718 2719 return 0; 2720 } 2721 2722 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 2723 { 2724 u16 arc_id; 2725 2726 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 2727 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 2728 continue; 2729 2730 gaudi2_scrub_arc_dccm(hdev, arc_id); 2731 } 2732 } 2733 2734 static int gaudi2_late_init(struct hl_device *hdev) 2735 { 2736 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2737 int rc; 2738 2739 hdev->asic_prop.supports_advanced_cpucp_rc = true; 2740 2741 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 2742 gaudi2->virt_msix_db_dma_addr); 2743 if (rc) { 2744 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 2745 return rc; 2746 } 2747 2748 rc = gaudi2_fetch_psoc_frequency(hdev); 2749 if (rc) { 2750 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 2751 goto disable_pci_access; 2752 } 2753 2754 gaudi2_init_arcs(hdev); 2755 gaudi2_scrub_arcs_dccm(hdev); 2756 gaudi2_init_security(hdev); 2757 2758 return 0; 2759 2760 disable_pci_access: 2761 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2762 2763 return rc; 2764 } 2765 2766 static void gaudi2_late_fini(struct hl_device *hdev) 2767 { 2768 hl_hwmon_release_resources(hdev); 2769 } 2770 2771 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 2772 { 2773 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2774 2775 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2776 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2777 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2778 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2779 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2780 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2781 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2782 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2783 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2784 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2785 } 2786 2787 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 2788 { 2789 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2790 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2791 u32 block_size, umr_start_idx, num_umr_blocks; 2792 int i; 2793 2794 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 2795 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 2796 block_size = ARC_DCCM_BLOCK_SIZE * 2; 2797 else 2798 block_size = ARC_DCCM_BLOCK_SIZE; 2799 2800 blocks[i].address = gaudi2_arc_dccm_bases[i]; 2801 blocks[i].size = block_size; 2802 } 2803 2804 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 2805 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 2806 2807 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 2808 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 2809 2810 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 2811 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 2812 2813 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 2814 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 2815 2816 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 2817 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 2818 2819 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 2820 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 2821 2822 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 2823 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 2824 2825 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 2826 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 2827 2828 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 2829 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 2830 for (i = 0 ; i < num_umr_blocks ; i++) { 2831 u8 nic_id, umr_block_id; 2832 2833 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 2834 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 2835 2836 blocks[umr_start_idx + i].address = 2837 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 2838 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 2839 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 2840 umr_block_id * NIC_UMR_OFFSET; 2841 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 2842 } 2843 2844 /* Expose decoder HW configuration block to user */ 2845 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 2846 2847 for (i = 1; i < NUM_OF_DCORES; ++i) { 2848 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 2849 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 2850 2851 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 2852 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 2853 2854 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 2855 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 2856 } 2857 } 2858 2859 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 2860 { 2861 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 2862 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 2863 int i, j, rc = 0; 2864 2865 /* The device ARC works with 32-bits addresses, and because there is a single HW register 2866 * that holds the extension bits (49..28), these bits must be identical in all the allocated 2867 * range. 2868 */ 2869 2870 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 2871 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 2872 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 2873 if (!virt_addr_arr[i]) { 2874 rc = -ENOMEM; 2875 goto free_dma_mem_arr; 2876 } 2877 2878 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 2879 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 2880 break; 2881 } 2882 2883 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 2884 dev_err(hdev->dev, 2885 "MSB of ARC accessible DMA memory are not identical in all range\n"); 2886 rc = -EFAULT; 2887 goto free_dma_mem_arr; 2888 } 2889 2890 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 2891 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 2892 2893 free_dma_mem_arr: 2894 for (j = 0 ; j < i ; j++) 2895 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 2896 dma_addr_arr[j]); 2897 2898 return rc; 2899 } 2900 2901 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 2902 { 2903 struct asic_fixed_properties *prop = &hdev->asic_prop; 2904 struct pci_mem_region *region; 2905 2906 /* CFG */ 2907 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 2908 region->region_base = CFG_BASE; 2909 region->region_size = CFG_SIZE; 2910 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 2911 region->bar_size = CFG_BAR_SIZE; 2912 region->bar_id = SRAM_CFG_BAR_ID; 2913 region->used = 1; 2914 2915 /* SRAM */ 2916 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 2917 region->region_base = SRAM_BASE_ADDR; 2918 region->region_size = SRAM_SIZE; 2919 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 2920 region->bar_size = CFG_BAR_SIZE; 2921 region->bar_id = SRAM_CFG_BAR_ID; 2922 region->used = 1; 2923 2924 /* DRAM */ 2925 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 2926 region->region_base = DRAM_PHYS_BASE; 2927 region->region_size = hdev->asic_prop.dram_size; 2928 region->offset_in_bar = 0; 2929 region->bar_size = prop->dram_pci_bar_size; 2930 region->bar_id = DRAM_BAR_ID; 2931 region->used = 1; 2932 } 2933 2934 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 2935 { 2936 struct asic_fixed_properties *prop = &hdev->asic_prop; 2937 int i, j, k; 2938 2939 /* Initialize common user CQ interrupt */ 2940 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 2941 HL_COMMON_USER_CQ_INTERRUPT_ID, false); 2942 2943 /* Initialize common decoder interrupt */ 2944 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 2945 HL_COMMON_DEC_INTERRUPT_ID, true); 2946 2947 /* User interrupts structure holds both decoder and user interrupts from various engines. 2948 * We first initialize the decoder interrupts and then we add the user interrupts. 2949 * The only limitation is that the last decoder interrupt id must be smaller 2950 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 2951 */ 2952 2953 /* Initialize decoder interrupts, expose only normal interrupts, 2954 * error interrupts to be handled by driver 2955 */ 2956 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 2957 i += 2, j++) 2958 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true); 2959 2960 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 2961 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false); 2962 } 2963 2964 static inline int gaudi2_get_non_zero_random_int(void) 2965 { 2966 int rand = get_random_u32(); 2967 2968 return rand ? rand : 1; 2969 } 2970 2971 static int gaudi2_sw_init(struct hl_device *hdev) 2972 { 2973 struct asic_fixed_properties *prop = &hdev->asic_prop; 2974 struct gaudi2_device *gaudi2; 2975 int i, rc; 2976 2977 /* Allocate device structure */ 2978 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 2979 if (!gaudi2) 2980 return -ENOMEM; 2981 2982 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 2983 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 2984 continue; 2985 2986 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 2987 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 2988 GAUDI2_EVENT_SIZE); 2989 rc = -EINVAL; 2990 goto free_gaudi2_device; 2991 } 2992 2993 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 2994 } 2995 2996 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 2997 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 2998 2999 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 3000 3001 hdev->asic_specific = gaudi2; 3002 3003 /* Create DMA pool for small allocations. 3004 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 3005 * PI/CI registers allocated from this pool have this restriction 3006 */ 3007 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 3008 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 3009 if (!hdev->dma_pool) { 3010 dev_err(hdev->dev, "failed to create DMA pool\n"); 3011 rc = -ENOMEM; 3012 goto free_gaudi2_device; 3013 } 3014 3015 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3016 if (rc) 3017 goto free_dma_pool; 3018 3019 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3020 if (!hdev->cpu_accessible_dma_pool) { 3021 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3022 rc = -ENOMEM; 3023 goto free_cpu_dma_mem; 3024 } 3025 3026 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3027 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3028 if (rc) { 3029 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3030 rc = -EFAULT; 3031 goto free_cpu_accessible_dma_pool; 3032 } 3033 3034 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3035 &gaudi2->virt_msix_db_dma_addr); 3036 if (!gaudi2->virt_msix_db_cpu_addr) { 3037 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3038 rc = -ENOMEM; 3039 goto free_cpu_accessible_dma_pool; 3040 } 3041 3042 spin_lock_init(&gaudi2->hw_queues_lock); 3043 3044 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3045 &gaudi2->scratchpad_bus_address, 3046 GFP_KERNEL | __GFP_ZERO); 3047 if (!gaudi2->scratchpad_kernel_address) { 3048 rc = -ENOMEM; 3049 goto free_virt_msix_db_mem; 3050 } 3051 3052 gaudi2_user_mapped_blocks_init(hdev); 3053 3054 /* Initialize user interrupts */ 3055 gaudi2_user_interrupt_setup(hdev); 3056 3057 hdev->supports_coresight = true; 3058 hdev->supports_sync_stream = true; 3059 hdev->supports_cb_mapping = true; 3060 hdev->supports_wait_for_multi_cs = false; 3061 3062 prop->supports_compute_reset = true; 3063 3064 hdev->asic_funcs->set_pci_memory_regions(hdev); 3065 3066 return 0; 3067 3068 free_virt_msix_db_mem: 3069 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3070 free_cpu_accessible_dma_pool: 3071 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3072 free_cpu_dma_mem: 3073 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3074 hdev->cpu_accessible_dma_address); 3075 free_dma_pool: 3076 dma_pool_destroy(hdev->dma_pool); 3077 free_gaudi2_device: 3078 kfree(gaudi2); 3079 return rc; 3080 } 3081 3082 static int gaudi2_sw_fini(struct hl_device *hdev) 3083 { 3084 struct asic_fixed_properties *prop = &hdev->asic_prop; 3085 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3086 3087 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3088 3089 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3090 3091 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3092 hdev->cpu_accessible_dma_address); 3093 3094 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3095 gaudi2->scratchpad_bus_address); 3096 3097 dma_pool_destroy(hdev->dma_pool); 3098 3099 kfree(gaudi2); 3100 3101 return 0; 3102 } 3103 3104 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3105 { 3106 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3107 QM_GLBL_CFG1_CQF_STOP | 3108 QM_GLBL_CFG1_CP_STOP); 3109 3110 /* stop also the ARC */ 3111 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3112 } 3113 3114 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3115 { 3116 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3117 QM_GLBL_CFG1_CQF_FLUSH | 3118 QM_GLBL_CFG1_CP_FLUSH); 3119 } 3120 3121 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3122 { 3123 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3124 } 3125 3126 /** 3127 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3128 * 3129 * @hdev: pointer to the habanalabs device structure 3130 * @queue_id: queue to clear fence counters to 3131 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3132 * getting stuck on any fence value. otherwise set all fence 3133 * counters to 0 (standard clear of fence counters) 3134 */ 3135 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3136 bool skip_fence) 3137 { 3138 u32 size, reg_base; 3139 u32 addr, val; 3140 3141 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3142 3143 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3144 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3145 3146 /* 3147 * in case we want to make sure that QM that is stuck on a fence will 3148 * be released we should set the fence counter to a higher value that 3149 * the value the QM waiting for. to comply with any fence counter of 3150 * any value we set maximum fence value to all counters 3151 */ 3152 val = skip_fence ? U32_MAX : 0; 3153 gaudi2_memset_device_lbw(hdev, addr, size, val); 3154 } 3155 3156 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3157 { 3158 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3159 3160 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3161 gaudi2_flush_qman_common(hdev, reg_base); 3162 gaudi2_flush_qman_arc_common(hdev, reg_base); 3163 } 3164 3165 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3166 { 3167 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3168 int dcore, inst; 3169 3170 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3171 goto stop_edma_qmans; 3172 3173 /* Stop CPs of PDMA QMANs */ 3174 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3175 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3176 3177 stop_edma_qmans: 3178 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3179 return; 3180 3181 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3182 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3183 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3184 u32 qm_base; 3185 3186 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3187 continue; 3188 3189 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3190 inst * DCORE_EDMA_OFFSET; 3191 3192 /* Stop CPs of EDMA QMANs */ 3193 gaudi2_stop_qman_common(hdev, qm_base); 3194 } 3195 } 3196 } 3197 3198 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3199 { 3200 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3201 u32 offset, i; 3202 3203 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3204 3205 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3206 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3207 continue; 3208 3209 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3210 } 3211 } 3212 3213 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3214 { 3215 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3216 u32 reg_base; 3217 int i; 3218 3219 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3220 return; 3221 3222 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3223 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3224 continue; 3225 3226 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3227 gaudi2_stop_qman_common(hdev, reg_base); 3228 } 3229 } 3230 3231 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3232 { 3233 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3234 u32 reg_base; 3235 int i; 3236 3237 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3238 return; 3239 3240 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3241 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3242 continue; 3243 3244 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3245 gaudi2_stop_qman_common(hdev, reg_base); 3246 } 3247 } 3248 3249 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3250 { 3251 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3252 u32 reg_base, queue_id; 3253 int i; 3254 3255 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3256 return; 3257 3258 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3259 3260 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3261 if (!(hdev->nic_ports_mask & BIT(i))) 3262 continue; 3263 3264 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3265 gaudi2_stop_qman_common(hdev, reg_base); 3266 } 3267 } 3268 3269 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3270 { 3271 u32 reg_val; 3272 3273 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3274 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3275 } 3276 3277 static void gaudi2_dma_stall(struct hl_device *hdev) 3278 { 3279 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3280 int dcore, inst; 3281 3282 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3283 goto stall_edma; 3284 3285 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3286 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3287 3288 stall_edma: 3289 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3290 return; 3291 3292 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3293 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3294 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3295 u32 core_base; 3296 3297 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3298 continue; 3299 3300 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3301 inst * DCORE_EDMA_OFFSET; 3302 3303 /* Stall CPs of EDMA QMANs */ 3304 gaudi2_stall_dma_common(hdev, core_base); 3305 } 3306 } 3307 } 3308 3309 static void gaudi2_mme_stall(struct hl_device *hdev) 3310 { 3311 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3312 u32 offset, i; 3313 3314 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3315 3316 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3317 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3318 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3319 } 3320 3321 static void gaudi2_tpc_stall(struct hl_device *hdev) 3322 { 3323 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3324 u32 reg_base; 3325 int i; 3326 3327 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3328 return; 3329 3330 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3331 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3332 continue; 3333 3334 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3335 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3336 } 3337 } 3338 3339 static void gaudi2_rotator_stall(struct hl_device *hdev) 3340 { 3341 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3342 u32 reg_val; 3343 int i; 3344 3345 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3346 return; 3347 3348 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3349 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3350 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3351 3352 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3353 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3354 continue; 3355 3356 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3357 } 3358 } 3359 3360 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3361 { 3362 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3363 } 3364 3365 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3366 { 3367 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3368 int dcore, inst; 3369 3370 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3371 goto stop_edma_qmans; 3372 3373 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3374 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3375 3376 stop_edma_qmans: 3377 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3378 return; 3379 3380 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3381 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3382 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3383 u32 qm_base; 3384 3385 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3386 continue; 3387 3388 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3389 inst * DCORE_EDMA_OFFSET; 3390 3391 /* Disable CPs of EDMA QMANs */ 3392 gaudi2_disable_qman_common(hdev, qm_base); 3393 } 3394 } 3395 } 3396 3397 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3398 { 3399 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3400 u32 offset, i; 3401 3402 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3403 3404 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3405 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3406 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3407 } 3408 3409 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3410 { 3411 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3412 u32 reg_base; 3413 int i; 3414 3415 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3416 return; 3417 3418 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3419 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3420 continue; 3421 3422 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3423 gaudi2_disable_qman_common(hdev, reg_base); 3424 } 3425 } 3426 3427 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 3428 { 3429 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3430 u32 reg_base; 3431 int i; 3432 3433 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3434 return; 3435 3436 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3437 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3438 continue; 3439 3440 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3441 gaudi2_disable_qman_common(hdev, reg_base); 3442 } 3443 } 3444 3445 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 3446 { 3447 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3448 u32 reg_base, queue_id; 3449 int i; 3450 3451 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3452 return; 3453 3454 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3455 3456 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3457 if (!(hdev->nic_ports_mask & BIT(i))) 3458 continue; 3459 3460 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3461 gaudi2_disable_qman_common(hdev, reg_base); 3462 } 3463 } 3464 3465 static void gaudi2_enable_timestamp(struct hl_device *hdev) 3466 { 3467 /* Disable the timestamp counter */ 3468 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3469 3470 /* Zero the lower/upper parts of the 64-bit counter */ 3471 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 3472 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 3473 3474 /* Enable the counter */ 3475 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 3476 } 3477 3478 static void gaudi2_disable_timestamp(struct hl_device *hdev) 3479 { 3480 /* Disable the timestamp counter */ 3481 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3482 } 3483 3484 static const char *gaudi2_irq_name(u16 irq_number) 3485 { 3486 switch (irq_number) { 3487 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 3488 return "gaudi2 cpu eq"; 3489 case GAUDI2_IRQ_NUM_COMPLETION: 3490 return "gaudi2 completion"; 3491 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 3492 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 3493 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 3494 return "gaudi2 user completion"; 3495 default: 3496 return "invalid"; 3497 } 3498 } 3499 3500 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 3501 { 3502 int i, irq, relative_idx; 3503 struct hl_dec *dec; 3504 3505 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 3506 irq = pci_irq_vector(hdev->pdev, i); 3507 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3508 3509 dec = hdev->dec + relative_idx / 2; 3510 3511 /* We pass different structures depending on the irq handler. For the abnormal 3512 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3513 * user_interrupt entry 3514 */ 3515 free_irq(irq, ((relative_idx % 2) ? 3516 (void *) dec : 3517 (void *) &hdev->user_interrupt[dec->core_id])); 3518 } 3519 } 3520 3521 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 3522 { 3523 int rc, i, irq_init_cnt, irq, relative_idx; 3524 irq_handler_t irq_handler; 3525 struct hl_dec *dec; 3526 3527 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 3528 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 3529 i++, irq_init_cnt++) { 3530 3531 irq = pci_irq_vector(hdev->pdev, i); 3532 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3533 3534 irq_handler = (relative_idx % 2) ? 3535 hl_irq_handler_dec_abnrm : 3536 hl_irq_handler_user_interrupt; 3537 3538 dec = hdev->dec + relative_idx / 2; 3539 3540 /* We pass different structures depending on the irq handler. For the abnormal 3541 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3542 * user_interrupt entry 3543 */ 3544 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), 3545 ((relative_idx % 2) ? 3546 (void *) dec : 3547 (void *) &hdev->user_interrupt[dec->core_id])); 3548 if (rc) { 3549 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3550 goto free_dec_irqs; 3551 } 3552 } 3553 3554 return 0; 3555 3556 free_dec_irqs: 3557 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 3558 return rc; 3559 } 3560 3561 static int gaudi2_enable_msix(struct hl_device *hdev) 3562 { 3563 struct asic_fixed_properties *prop = &hdev->asic_prop; 3564 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3565 int rc, irq, i, j, user_irq_init_cnt; 3566 irq_handler_t irq_handler; 3567 struct hl_cq *cq; 3568 3569 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 3570 return 0; 3571 3572 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 3573 PCI_IRQ_MSIX); 3574 if (rc < 0) { 3575 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 3576 GAUDI2_MSIX_ENTRIES, rc); 3577 return rc; 3578 } 3579 3580 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3581 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3582 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 3583 if (rc) { 3584 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3585 goto free_irq_vectors; 3586 } 3587 3588 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3589 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 3590 &hdev->event_queue); 3591 if (rc) { 3592 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3593 goto free_completion_irq; 3594 } 3595 3596 rc = gaudi2_dec_enable_msix(hdev); 3597 if (rc) { 3598 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 3599 goto free_event_irq; 3600 } 3601 3602 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 3603 user_irq_init_cnt < prop->user_interrupt_count; 3604 i++, j++, user_irq_init_cnt++) { 3605 3606 irq = pci_irq_vector(hdev->pdev, i); 3607 irq_handler = hl_irq_handler_user_interrupt; 3608 3609 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); 3610 if (rc) { 3611 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3612 goto free_user_irq; 3613 } 3614 } 3615 3616 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 3617 3618 return 0; 3619 3620 free_user_irq: 3621 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 3622 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 3623 3624 irq = pci_irq_vector(hdev->pdev, i); 3625 free_irq(irq, &hdev->user_interrupt[j]); 3626 } 3627 3628 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3629 3630 free_event_irq: 3631 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3632 free_irq(irq, cq); 3633 3634 free_completion_irq: 3635 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3636 free_irq(irq, cq); 3637 3638 free_irq_vectors: 3639 pci_free_irq_vectors(hdev->pdev); 3640 3641 return rc; 3642 } 3643 3644 static void gaudi2_sync_irqs(struct hl_device *hdev) 3645 { 3646 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3647 int i, j; 3648 int irq; 3649 3650 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3651 return; 3652 3653 /* Wait for all pending IRQs to be finished */ 3654 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 3655 3656 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 3657 irq = pci_irq_vector(hdev->pdev, i); 3658 synchronize_irq(irq); 3659 } 3660 3661 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 3662 i++, j++) { 3663 irq = pci_irq_vector(hdev->pdev, i); 3664 synchronize_irq(irq); 3665 } 3666 3667 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 3668 } 3669 3670 static void gaudi2_disable_msix(struct hl_device *hdev) 3671 { 3672 struct asic_fixed_properties *prop = &hdev->asic_prop; 3673 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3674 struct hl_cq *cq; 3675 int irq, i, j, k; 3676 3677 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3678 return; 3679 3680 gaudi2_sync_irqs(hdev); 3681 3682 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3683 free_irq(irq, &hdev->event_queue); 3684 3685 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3686 3687 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 3688 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 3689 3690 irq = pci_irq_vector(hdev->pdev, i); 3691 free_irq(irq, &hdev->user_interrupt[j]); 3692 } 3693 3694 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3695 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3696 free_irq(irq, cq); 3697 3698 pci_free_irq_vectors(hdev->pdev); 3699 3700 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 3701 } 3702 3703 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 3704 { 3705 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3706 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3707 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3708 int rc; 3709 3710 if (hdev->pldm) 3711 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3712 else 3713 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3714 3715 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3716 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 3717 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3718 continue; 3719 3720 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 3721 3722 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 3723 3724 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3725 3726 /* Wait till all traffic from decoder stops 3727 * before apply core reset. 3728 */ 3729 rc = hl_poll_timeout( 3730 hdev, 3731 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3732 graceful, 3733 (graceful & graceful_pend_mask), 3734 100, 3735 timeout_usec); 3736 if (rc) 3737 dev_err(hdev->dev, 3738 "Failed to stop traffic from DCORE%d Decoder %d\n", 3739 dcore_id, dec_id); 3740 } 3741 } 3742 3743 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 3744 { 3745 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3746 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3747 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3748 int rc; 3749 3750 if (hdev->pldm) 3751 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3752 else 3753 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3754 3755 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3756 dec_bit = PCIE_DEC_SHIFT + dec_id; 3757 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3758 continue; 3759 3760 offset = dec_id * PCIE_VDEC_OFFSET; 3761 3762 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 3763 3764 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3765 3766 /* Wait till all traffic from decoder stops 3767 * before apply core reset. 3768 */ 3769 rc = hl_poll_timeout( 3770 hdev, 3771 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3772 graceful, 3773 (graceful & graceful_pend_mask), 3774 100, 3775 timeout_usec); 3776 if (rc) 3777 dev_err(hdev->dev, 3778 "Failed to stop traffic from PCIe Decoder %d\n", 3779 dec_id); 3780 } 3781 } 3782 3783 static void gaudi2_stop_dec(struct hl_device *hdev) 3784 { 3785 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3786 int dcore_id; 3787 3788 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 3789 return; 3790 3791 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 3792 gaudi2_stop_dcore_dec(hdev, dcore_id); 3793 3794 gaudi2_stop_pcie_dec(hdev); 3795 } 3796 3797 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3798 { 3799 u32 reg_base, reg_val; 3800 3801 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3802 if (run_mode == HL_ENGINE_CORE_RUN) 3803 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 3804 else 3805 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3806 3807 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 3808 } 3809 3810 static void gaudi2_halt_arcs(struct hl_device *hdev) 3811 { 3812 u16 arc_id; 3813 3814 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 3815 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3816 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 3817 } 3818 } 3819 3820 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3821 { 3822 int rc; 3823 u32 reg_base, val, ack_mask, timeout_usec = 100000; 3824 3825 if (hdev->pldm) 3826 timeout_usec *= 100; 3827 3828 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3829 if (run_mode == HL_ENGINE_CORE_RUN) 3830 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 3831 else 3832 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 3833 3834 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 3835 val, ((val & ack_mask) == ack_mask), 3836 1000, timeout_usec); 3837 3838 if (!rc) { 3839 /* Clear */ 3840 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 3841 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 3842 } 3843 3844 return rc; 3845 } 3846 3847 static void gaudi2_reset_arcs(struct hl_device *hdev) 3848 { 3849 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3850 u16 arc_id; 3851 3852 if (!gaudi2) 3853 return; 3854 3855 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 3856 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3857 gaudi2_clr_arc_id_cap(hdev, arc_id); 3858 } 3859 3860 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 3861 { 3862 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3863 u32 queue_id; 3864 int i; 3865 3866 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3867 return; 3868 3869 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3870 3871 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3872 if (!(hdev->nic_ports_mask & BIT(i))) 3873 continue; 3874 3875 gaudi2_qman_manual_flush_common(hdev, queue_id); 3876 } 3877 } 3878 3879 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 3880 u32 num_cores, u32 core_command) 3881 { 3882 int i, rc; 3883 3884 3885 for (i = 0 ; i < num_cores ; i++) { 3886 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 3887 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 3888 } 3889 3890 for (i = 0 ; i < num_cores ; i++) { 3891 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 3892 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 3893 3894 if (rc) { 3895 dev_err(hdev->dev, "failed to %s arc: %d\n", 3896 (core_command == HL_ENGINE_CORE_HALT) ? 3897 "HALT" : "RUN", core_ids[i]); 3898 return -1; 3899 } 3900 } 3901 } 3902 3903 return 0; 3904 } 3905 3906 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3907 { 3908 u32 wait_timeout_ms; 3909 3910 if (hdev->pldm) 3911 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 3912 else 3913 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 3914 3915 if (fw_reset) 3916 goto skip_engines; 3917 3918 gaudi2_stop_dma_qmans(hdev); 3919 gaudi2_stop_mme_qmans(hdev); 3920 gaudi2_stop_tpc_qmans(hdev); 3921 gaudi2_stop_rot_qmans(hdev); 3922 gaudi2_stop_nic_qmans(hdev); 3923 msleep(wait_timeout_ms); 3924 3925 gaudi2_halt_arcs(hdev); 3926 gaudi2_dma_stall(hdev); 3927 gaudi2_mme_stall(hdev); 3928 gaudi2_tpc_stall(hdev); 3929 gaudi2_rotator_stall(hdev); 3930 3931 msleep(wait_timeout_ms); 3932 3933 gaudi2_stop_dec(hdev); 3934 3935 /* 3936 * in case of soft reset do a manual flush for QMANs (currently called 3937 * only for NIC QMANs 3938 */ 3939 if (!hard_reset) 3940 gaudi2_nic_qmans_manual_flush(hdev); 3941 3942 gaudi2_disable_dma_qmans(hdev); 3943 gaudi2_disable_mme_qmans(hdev); 3944 gaudi2_disable_tpc_qmans(hdev); 3945 gaudi2_disable_rot_qmans(hdev); 3946 gaudi2_disable_nic_qmans(hdev); 3947 gaudi2_disable_timestamp(hdev); 3948 3949 skip_engines: 3950 if (hard_reset) { 3951 gaudi2_disable_msix(hdev); 3952 return; 3953 } 3954 3955 gaudi2_sync_irqs(hdev); 3956 } 3957 3958 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 3959 { 3960 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3961 3962 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3963 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3964 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3965 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3966 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3967 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 3968 } 3969 3970 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 3971 { 3972 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3973 struct dynamic_fw_load_mgr *dynamic_loader; 3974 struct cpu_dyn_regs *dyn_regs; 3975 3976 /* fill common fields */ 3977 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3978 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 3979 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 3980 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 3981 fw_loader->skip_bmc = false; 3982 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 3983 fw_loader->dram_bar_id = DRAM_BAR_ID; 3984 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 3985 3986 /* here we update initial values for few specific dynamic regs (as 3987 * before reading the first descriptor from FW those value has to be 3988 * hard-coded). in later stages of the protocol those values will be 3989 * updated automatically by reading the FW descriptor so data there 3990 * will always be up-to-date 3991 */ 3992 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3993 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3994 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3995 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3996 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 3997 } 3998 3999 static int gaudi2_init_cpu(struct hl_device *hdev) 4000 { 4001 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4002 int rc; 4003 4004 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 4005 return 0; 4006 4007 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 4008 return 0; 4009 4010 rc = hl_fw_init_cpu(hdev); 4011 if (rc) 4012 return rc; 4013 4014 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4015 4016 return 0; 4017 } 4018 4019 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4020 { 4021 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4022 struct asic_fixed_properties *prop = &hdev->asic_prop; 4023 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4024 struct cpu_dyn_regs *dyn_regs; 4025 struct hl_eq *eq; 4026 u32 status; 4027 int err; 4028 4029 if (!hdev->cpu_queues_enable) 4030 return 0; 4031 4032 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4033 return 0; 4034 4035 eq = &hdev->event_queue; 4036 4037 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4038 4039 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4040 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4041 4042 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4043 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4044 4045 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4046 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4047 4048 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4049 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4050 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4051 4052 /* Used for EQ CI */ 4053 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4054 4055 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4056 4057 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4058 4059 /* Let the ARC know we are ready as it is now handling those queues */ 4060 4061 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4062 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4063 4064 err = hl_poll_timeout( 4065 hdev, 4066 mmCPU_IF_QUEUE_INIT, 4067 status, 4068 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4069 1000, 4070 cpu_timeout); 4071 4072 if (err) { 4073 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4074 return -EIO; 4075 } 4076 4077 /* update FW application security bits */ 4078 if (prop->fw_cpu_boot_dev_sts0_valid) 4079 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4080 4081 if (prop->fw_cpu_boot_dev_sts1_valid) 4082 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4083 4084 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4085 return 0; 4086 } 4087 4088 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4089 u32 queue_id_base) 4090 { 4091 struct hl_hw_queue *q; 4092 u32 pq_id, pq_offset; 4093 4094 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4095 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4096 pq_offset = pq_id * 4; 4097 4098 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4099 lower_32_bits(q->bus_address)); 4100 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4101 upper_32_bits(q->bus_address)); 4102 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4103 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4104 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4105 } 4106 } 4107 4108 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4109 { 4110 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4111 4112 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4113 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4114 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4115 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4116 4117 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4118 cp_offset = cp_id * 4; 4119 4120 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4121 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4122 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4123 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4124 } 4125 4126 /* allow QMANs to accept work from ARC CQF */ 4127 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4128 } 4129 4130 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4131 u32 queue_id_base) 4132 { 4133 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4134 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4135 4136 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4137 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4138 4139 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4140 pq_offset = pq_id * 4; 4141 4142 /* Configure QMAN HBW to scratchpad as it is not needed */ 4143 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4144 lower_32_bits(gaudi2->scratchpad_bus_address)); 4145 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4146 upper_32_bits(gaudi2->scratchpad_bus_address)); 4147 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4148 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4149 4150 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4151 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4152 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4153 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4154 } 4155 4156 /* Enable QMAN H/W completion */ 4157 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4158 } 4159 4160 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4161 { 4162 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4163 u32 sp_reg_addr; 4164 4165 switch (queue_id_base) { 4166 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4167 fallthrough; 4168 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4169 fallthrough; 4170 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4171 fallthrough; 4172 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4173 fallthrough; 4174 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4175 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4176 break; 4177 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4178 fallthrough; 4179 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4180 fallthrough; 4181 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4182 fallthrough; 4183 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4184 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4185 break; 4186 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4187 fallthrough; 4188 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4189 fallthrough; 4190 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4191 fallthrough; 4192 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4193 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4194 break; 4195 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4196 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4197 break; 4198 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4199 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4200 break; 4201 default: 4202 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4203 return 0; 4204 } 4205 4206 return sp_reg_addr; 4207 } 4208 4209 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4210 u32 queue_id_base) 4211 { 4212 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4213 int map_table_entry; 4214 4215 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4216 4217 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4218 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4219 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4220 4221 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4222 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4223 gaudi2_irq_map_table[map_table_entry].cpu_id); 4224 4225 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4226 4227 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4228 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4229 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4230 4231 /* Enable the QMAN channel. 4232 * PDMA QMAN configuration is different, as we do not allow user to 4233 * access some of the CPs. 4234 * PDMA0: CP2/3 are reserved for the ARC usage. 4235 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4236 */ 4237 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4238 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4239 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4240 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4241 else 4242 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4243 } 4244 4245 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4246 u32 queue_id_base) 4247 { 4248 u32 pq_id; 4249 4250 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4251 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4252 4253 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4254 gaudi2_init_qman_cp(hdev, reg_base); 4255 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4256 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4257 } 4258 4259 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 4260 u32 dma_core_id, bool is_secure) 4261 { 4262 u32 prot, irq_handler_offset; 4263 struct cpu_dyn_regs *dyn_regs; 4264 int map_table_entry; 4265 4266 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 4267 if (is_secure) 4268 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 4269 4270 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 4271 4272 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4273 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 4274 4275 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 4276 lower_32_bits(CFG_BASE + irq_handler_offset)); 4277 4278 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 4279 upper_32_bits(CFG_BASE + irq_handler_offset)); 4280 4281 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 4282 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 4283 gaudi2_irq_map_table[map_table_entry].cpu_id); 4284 4285 /* Enable the DMA channel */ 4286 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 4287 } 4288 4289 static void gaudi2_init_kdma(struct hl_device *hdev) 4290 { 4291 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4292 u32 reg_base; 4293 4294 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 4295 return; 4296 4297 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 4298 4299 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 4300 4301 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 4302 } 4303 4304 static void gaudi2_init_pdma(struct hl_device *hdev) 4305 { 4306 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4307 u32 reg_base; 4308 4309 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 4310 return; 4311 4312 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 4313 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 4314 4315 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 4316 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 4317 4318 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 4319 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 4320 4321 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 4322 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 4323 4324 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 4325 } 4326 4327 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 4328 { 4329 u32 reg_base, base_edma_core_id, base_edma_qman_id; 4330 4331 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 4332 base_edma_qman_id = edma_stream_base[seq]; 4333 4334 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 4335 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 4336 4337 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 4338 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 4339 } 4340 4341 static void gaudi2_init_edma(struct hl_device *hdev) 4342 { 4343 struct asic_fixed_properties *prop = &hdev->asic_prop; 4344 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4345 int dcore, inst; 4346 4347 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 4348 return; 4349 4350 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 4351 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 4352 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 4353 4354 if (!(prop->edma_enabled_mask & BIT(seq))) 4355 continue; 4356 4357 gaudi2_init_edma_instance(hdev, seq); 4358 4359 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 4360 } 4361 } 4362 } 4363 4364 /* 4365 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 4366 * @hdev: pointer to habanalabs device structure. 4367 * @sob_id: sync object ID. 4368 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 4369 * @interrupt_id: interrupt ID. 4370 * 4371 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 4372 * write directly to the HBW host memory of the virtual MSI-X doorbell. 4373 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 4374 * 4375 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 4376 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 4377 * completion, by decrementing the sync object value and re-arming the monitor. 4378 */ 4379 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 4380 u32 first_mon_id, u32 interrupt_id) 4381 { 4382 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 4383 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4384 u64 addr; 4385 u8 mask; 4386 4387 /* Reset the SOB value */ 4388 sob_offset = sob_id * sizeof(u32); 4389 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 4390 4391 /* Configure 3 monitors: 4392 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 4393 * 2. Decrement SOB value by 1. 4394 * 3. Re-arm the master monitor. 4395 */ 4396 4397 first_mon_offset = first_mon_id * sizeof(u32); 4398 4399 /* 2nd monitor: Decrement SOB value by 1 */ 4400 mon_offset = first_mon_offset + sizeof(u32); 4401 4402 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 4403 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4404 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4405 4406 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 4407 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 4408 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 4409 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4410 4411 /* 3rd monitor: Re-arm the master monitor */ 4412 mon_offset = first_mon_offset + 2 * sizeof(u32); 4413 4414 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 4415 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4417 4418 sob_group = sob_id / 8; 4419 mask = ~BIT(sob_id & 0x7); 4420 mode = 0; /* comparison mode is "greater than or equal to" */ 4421 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 4422 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 4423 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 4424 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 4425 4426 payload = arm; 4427 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4428 4429 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 4430 mon_offset = first_mon_offset; 4431 4432 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 4433 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 4434 4435 addr = gaudi2->virt_msix_db_dma_addr; 4436 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4437 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4438 4439 payload = interrupt_id; 4440 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4441 4442 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 4443 } 4444 4445 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 4446 { 4447 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 4448 struct asic_fixed_properties *prop = &hdev->asic_prop; 4449 4450 /* Decoder normal/abnormal interrupts */ 4451 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 4452 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 4453 continue; 4454 4455 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4456 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 4457 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 4458 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4459 4460 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4461 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 4462 interrupt_id += 1; 4463 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4464 } 4465 } 4466 4467 static void gaudi2_init_sm(struct hl_device *hdev) 4468 { 4469 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4470 u64 cq_address; 4471 u32 reg_val; 4472 int i; 4473 4474 /* Enable HBW/LBW CQ for completion monitors */ 4475 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4476 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 4477 4478 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 4479 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4480 4481 /* Enable only HBW CQ for KDMA completion monitor */ 4482 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4483 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4484 4485 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 4486 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 4487 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 4488 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 4489 4490 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 4491 cq_address = 4492 hdev->completion_queue[i].bus_address; 4493 4494 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 4495 lower_32_bits(cq_address)); 4496 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 4497 upper_32_bits(cq_address)); 4498 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 4499 ilog2(HL_CQ_SIZE_IN_BYTES)); 4500 } 4501 4502 /* Configure kernel ASID and MMU BP*/ 4503 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 4504 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 4505 4506 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 4507 gaudi2_prepare_sm_for_virt_msix_db(hdev); 4508 } 4509 4510 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 4511 { 4512 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4513 u32 reg_val; 4514 int i; 4515 4516 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 4517 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 4518 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 4519 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 4520 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 4521 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 4522 4523 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 4524 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 4525 4526 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 4527 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 4528 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 4529 } 4530 } 4531 4532 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 4533 bool config_qman_only) 4534 { 4535 u32 queue_id_base, reg_base; 4536 4537 switch (dcore_id) { 4538 case 0: 4539 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 4540 break; 4541 case 1: 4542 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 4543 break; 4544 case 2: 4545 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 4546 break; 4547 case 3: 4548 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 4549 break; 4550 default: 4551 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 4552 return; 4553 } 4554 4555 if (!config_qman_only) { 4556 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 4557 gaudi2_init_mme_acc(hdev, reg_base); 4558 } 4559 4560 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 4561 gaudi2_init_qman(hdev, reg_base, queue_id_base); 4562 } 4563 4564 static void gaudi2_init_mme(struct hl_device *hdev) 4565 { 4566 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4567 int i; 4568 4569 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 4570 return; 4571 4572 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 4573 gaudi2_init_dcore_mme(hdev, i, false); 4574 4575 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 4576 } 4577 } 4578 4579 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 4580 { 4581 /* Mask arithmetic and QM interrupts in TPC */ 4582 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 4583 4584 /* Set 16 cache lines */ 4585 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 4586 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 4587 } 4588 4589 struct gaudi2_tpc_init_cfg_data { 4590 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 4591 }; 4592 4593 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 4594 u32 offset, struct iterate_module_ctx *ctx) 4595 { 4596 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4597 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 4598 u32 queue_id_base; 4599 u8 seq; 4600 4601 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 4602 4603 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 4604 /* gets last sequence number */ 4605 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 4606 else 4607 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 4608 4609 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 4610 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 4611 4612 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 4613 } 4614 4615 static void gaudi2_init_tpc(struct hl_device *hdev) 4616 { 4617 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4618 struct gaudi2_tpc_init_cfg_data init_cfg_data; 4619 struct iterate_module_ctx tpc_iter; 4620 4621 if (!hdev->asic_prop.tpc_enabled_mask) 4622 return; 4623 4624 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 4625 return; 4626 4627 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 4628 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 4629 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 4630 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 4631 tpc_iter.fn = &gaudi2_init_tpc_config; 4632 tpc_iter.data = &init_cfg_data; 4633 gaudi2_iterate_tpcs(hdev, &tpc_iter); 4634 } 4635 4636 static void gaudi2_init_rotator(struct hl_device *hdev) 4637 { 4638 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4639 u32 i, reg_base, queue_id; 4640 4641 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 4642 4643 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4644 reg_base = gaudi2_qm_blocks_bases[queue_id]; 4645 gaudi2_init_qman(hdev, reg_base, queue_id); 4646 4647 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 4648 } 4649 } 4650 4651 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 4652 { 4653 u32 sob_id; 4654 4655 /* VCMD normal interrupt */ 4656 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4657 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 4658 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4659 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4660 4661 /* VCMD abnormal interrupt */ 4662 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4663 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 4664 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4665 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4666 } 4667 4668 static void gaudi2_init_dec(struct hl_device *hdev) 4669 { 4670 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4671 u32 dcore_id, dec_id, dec_bit; 4672 u64 base_addr; 4673 4674 if (!hdev->asic_prop.decoder_enabled_mask) 4675 return; 4676 4677 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 4678 return; 4679 4680 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4681 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4682 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4683 4684 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4685 continue; 4686 4687 base_addr = mmDCORE0_DEC0_CMD_BASE + 4688 BRDG_CTRL_BLOCK_OFFSET + 4689 dcore_id * DCORE_OFFSET + 4690 dec_id * DCORE_VDEC_OFFSET; 4691 4692 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4693 4694 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4695 } 4696 4697 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 4698 dec_bit = PCIE_DEC_SHIFT + dec_id; 4699 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4700 continue; 4701 4702 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 4703 dec_id * DCORE_VDEC_OFFSET; 4704 4705 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4706 4707 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4708 } 4709 } 4710 4711 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 4712 u32 stlb_base, u32 asid, u64 phys_addr) 4713 { 4714 u32 status, timeout_usec; 4715 int rc; 4716 4717 if (hdev->pldm || !hdev->pdev) 4718 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 4719 else 4720 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 4721 4722 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 4723 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 4724 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 4725 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 4726 4727 rc = hl_poll_timeout( 4728 hdev, 4729 stlb_base + STLB_BUSY_OFFSET, 4730 status, 4731 !(status & 0x80000000), 4732 1000, 4733 timeout_usec); 4734 4735 if (rc) { 4736 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 4737 return rc; 4738 } 4739 4740 return 0; 4741 } 4742 4743 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 4744 u32 start_offset, u32 inv_start_val, 4745 u32 flags) 4746 { 4747 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 4748 if (flags & MMU_OP_CLEAR_MEMCACHE) 4749 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 4750 4751 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 4752 return; 4753 4754 WREG32(stlb_base + start_offset, inv_start_val); 4755 } 4756 4757 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 4758 struct gaudi2_cache_invld_params *inv_params) 4759 { 4760 u32 status, timeout_usec, start_offset; 4761 int rc; 4762 4763 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 4764 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 4765 4766 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 4767 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 4768 rc = hl_poll_timeout( 4769 hdev, 4770 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 4771 status, 4772 status & 0x1, 4773 1000, 4774 timeout_usec); 4775 4776 if (rc) 4777 return rc; 4778 4779 /* Need to manually reset the status to 0 */ 4780 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 4781 } 4782 4783 /* Lower cache does not work with cache lines, hence we can skip its 4784 * invalidation upon map and invalidate only upon unmap 4785 */ 4786 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 4787 return 0; 4788 4789 start_offset = inv_params->range_invalidation ? 4790 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 4791 4792 rc = hl_poll_timeout( 4793 hdev, 4794 stlb_base + start_offset, 4795 status, 4796 !(status & 0x1), 4797 1000, 4798 timeout_usec); 4799 4800 return rc; 4801 } 4802 4803 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 4804 { 4805 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4806 u32 hw_cap; 4807 4808 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 4809 4810 if (gaudi2->hw_cap_initialized & hw_cap) 4811 return true; 4812 4813 return false; 4814 } 4815 4816 /* this function shall be called only for HMMUs for which capability bit is set */ 4817 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 4818 { 4819 u32 offset; 4820 4821 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 4822 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 4823 } 4824 4825 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 4826 struct gaudi2_cache_invld_params *inv_params) 4827 { 4828 u32 start_offset; 4829 4830 if (inv_params->range_invalidation) { 4831 /* Set the addresses range 4832 * Note: that the start address we set in register, is not included in 4833 * the range of the invalidation, by design. 4834 * that's why we need to set lower address than the one we actually 4835 * want to be included in the range invalidation. 4836 */ 4837 u64 start = inv_params->start_va - 1; 4838 4839 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 4840 4841 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 4842 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 4843 4844 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 4845 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 4846 4847 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 4848 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 4849 4850 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 4851 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 4852 } else { 4853 start_offset = STLB_INV_ALL_START_OFFSET; 4854 } 4855 4856 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 4857 inv_params->inv_start_val, inv_params->flags); 4858 } 4859 4860 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 4861 int dcore_id, int hmmu_id, 4862 struct gaudi2_cache_invld_params *inv_params) 4863 { 4864 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4865 4866 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 4867 } 4868 4869 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 4870 int dcore_id, int hmmu_id, 4871 struct gaudi2_cache_invld_params *inv_params) 4872 { 4873 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4874 4875 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 4876 } 4877 4878 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 4879 struct gaudi2_cache_invld_params *inv_params) 4880 { 4881 int dcore_id, hmmu_id; 4882 4883 /* first send all invalidation commands */ 4884 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4885 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4886 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4887 continue; 4888 4889 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 4890 } 4891 } 4892 4893 /* next, poll all invalidations status */ 4894 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4895 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4896 int rc; 4897 4898 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4899 continue; 4900 4901 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 4902 inv_params); 4903 if (rc) 4904 return rc; 4905 } 4906 } 4907 4908 return 0; 4909 } 4910 4911 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 4912 { 4913 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4914 struct gaudi2_cache_invld_params invld_params; 4915 int rc = 0; 4916 4917 if (hdev->reset_info.hard_reset_pending) 4918 return rc; 4919 4920 invld_params.range_invalidation = false; 4921 invld_params.inv_start_val = 1; 4922 4923 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4924 invld_params.flags = flags; 4925 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4926 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4927 &invld_params); 4928 } else if (flags & MMU_OP_PHYS_PACK) { 4929 invld_params.flags = 0; 4930 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4931 } 4932 4933 return rc; 4934 } 4935 4936 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 4937 u32 flags, u32 asid, u64 va, u64 size) 4938 { 4939 struct gaudi2_cache_invld_params invld_params = {0}; 4940 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4941 u64 start_va, end_va; 4942 u32 inv_start_val; 4943 int rc = 0; 4944 4945 if (hdev->reset_info.hard_reset_pending) 4946 return 0; 4947 4948 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 4949 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 4950 asid << MMU_RANGE_INV_ASID_SHIFT); 4951 start_va = va; 4952 end_va = start_va + size; 4953 4954 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4955 /* As range invalidation does not support zero address we will 4956 * do full invalidation in this case 4957 */ 4958 if (start_va) { 4959 invld_params.range_invalidation = true; 4960 invld_params.start_va = start_va; 4961 invld_params.end_va = end_va; 4962 invld_params.inv_start_val = inv_start_val; 4963 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 4964 } else { 4965 invld_params.range_invalidation = false; 4966 invld_params.inv_start_val = 1; 4967 invld_params.flags = flags; 4968 } 4969 4970 4971 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4972 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4973 &invld_params); 4974 if (rc) 4975 return rc; 4976 4977 } else if (flags & MMU_OP_PHYS_PACK) { 4978 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 4979 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 4980 invld_params.inv_start_val = inv_start_val; 4981 invld_params.flags = flags; 4982 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4983 } 4984 4985 return rc; 4986 } 4987 4988 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 4989 { 4990 struct asic_fixed_properties *prop = &hdev->asic_prop; 4991 u64 hop0_addr; 4992 u32 asid, max_asid = prop->max_asid; 4993 int rc; 4994 4995 /* it takes too much time to init all of the ASIDs on palladium */ 4996 if (hdev->pldm) 4997 max_asid = min((u32) 8, max_asid); 4998 4999 for (asid = 0 ; asid < max_asid ; asid++) { 5000 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 5001 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 5002 if (rc) { 5003 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 5004 return rc; 5005 } 5006 } 5007 5008 return 0; 5009 } 5010 5011 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5012 { 5013 u32 status, timeout_usec; 5014 int rc; 5015 5016 if (hdev->pldm || !hdev->pdev) 5017 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5018 else 5019 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5020 5021 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5022 5023 rc = hl_poll_timeout( 5024 hdev, 5025 stlb_base + STLB_SRAM_INIT_OFFSET, 5026 status, 5027 !status, 5028 1000, 5029 timeout_usec); 5030 5031 if (rc) 5032 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5033 5034 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5035 if (rc) 5036 return rc; 5037 5038 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5039 5040 rc = hl_poll_timeout( 5041 hdev, 5042 stlb_base + STLB_INV_ALL_START_OFFSET, 5043 status, 5044 !status, 5045 1000, 5046 timeout_usec); 5047 5048 if (rc) 5049 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5050 5051 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5052 5053 return rc; 5054 } 5055 5056 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5057 { 5058 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5059 u32 mmu_base, stlb_base; 5060 int rc; 5061 5062 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5063 return 0; 5064 5065 mmu_base = mmPMMU_HBW_MMU_BASE; 5066 stlb_base = mmPMMU_HBW_STLB_BASE; 5067 5068 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5069 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5070 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5071 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5072 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5073 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5074 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5075 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5076 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5077 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5078 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5079 5080 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5081 5082 if (PAGE_SIZE == SZ_64K) { 5083 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5084 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5085 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5086 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5087 FIELD_PREP( 5088 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5089 1), 5090 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5091 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5092 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5093 } 5094 5095 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5096 5097 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5098 if (rc) 5099 return rc; 5100 5101 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5102 5103 return 0; 5104 } 5105 5106 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5107 int hmmu_id) 5108 { 5109 struct asic_fixed_properties *prop = &hdev->asic_prop; 5110 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5111 u32 offset, mmu_base, stlb_base, hw_cap; 5112 u8 dmmu_seq; 5113 int rc; 5114 5115 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5116 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5117 5118 /* 5119 * return if DMMU is already initialized or if it's not out of 5120 * isolation (due to cluster binning) 5121 */ 5122 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5123 return 0; 5124 5125 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5126 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5127 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5128 5129 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5130 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5131 5132 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5133 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5134 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5135 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5136 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5137 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5138 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5139 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5140 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5141 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5142 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5143 5144 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5145 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5146 5147 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5148 5149 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5150 if (rc) 5151 return rc; 5152 5153 gaudi2->hw_cap_initialized |= hw_cap; 5154 5155 return 0; 5156 } 5157 5158 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5159 { 5160 int rc, dcore_id, hmmu_id; 5161 5162 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5163 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5164 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5165 if (rc) 5166 return rc; 5167 } 5168 5169 return 0; 5170 } 5171 5172 static int gaudi2_mmu_init(struct hl_device *hdev) 5173 { 5174 int rc; 5175 5176 rc = gaudi2_pci_mmu_init(hdev); 5177 if (rc) 5178 return rc; 5179 5180 rc = gaudi2_hbm_mmu_init(hdev); 5181 if (rc) 5182 return rc; 5183 5184 return 0; 5185 } 5186 5187 static int gaudi2_hw_init(struct hl_device *hdev) 5188 { 5189 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5190 int rc; 5191 5192 /* Let's mark in the H/W that we have reached this point. We check 5193 * this value in the reset_before_init function to understand whether 5194 * we need to reset the chip before doing H/W init. This register is 5195 * cleared by the H/W upon H/W reset 5196 */ 5197 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5198 5199 /* Perform read from the device to make sure device is up */ 5200 RREG32(mmHW_STATE); 5201 5202 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5203 * So we set it here and if anyone tries to move it later to 5204 * a different address, there will be an error 5205 */ 5206 if (hdev->asic_prop.iatu_done_by_fw) 5207 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5208 5209 /* 5210 * Before pushing u-boot/linux to device, need to set the hbm bar to 5211 * base address of dram 5212 */ 5213 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5214 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5215 return -EIO; 5216 } 5217 5218 rc = gaudi2_init_cpu(hdev); 5219 if (rc) { 5220 dev_err(hdev->dev, "failed to initialize CPU\n"); 5221 return rc; 5222 } 5223 5224 gaudi2_init_scrambler_hbm(hdev); 5225 gaudi2_init_kdma(hdev); 5226 5227 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5228 if (rc) { 5229 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5230 return rc; 5231 } 5232 5233 rc = gaudi2->cpucp_info_get(hdev); 5234 if (rc) { 5235 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5236 return rc; 5237 } 5238 5239 rc = gaudi2_mmu_init(hdev); 5240 if (rc) 5241 return rc; 5242 5243 gaudi2_init_pdma(hdev); 5244 gaudi2_init_edma(hdev); 5245 gaudi2_init_sm(hdev); 5246 gaudi2_init_tpc(hdev); 5247 gaudi2_init_mme(hdev); 5248 gaudi2_init_rotator(hdev); 5249 gaudi2_init_dec(hdev); 5250 gaudi2_enable_timestamp(hdev); 5251 5252 rc = gaudi2_coresight_init(hdev); 5253 if (rc) 5254 goto disable_queues; 5255 5256 rc = gaudi2_enable_msix(hdev); 5257 if (rc) 5258 goto disable_queues; 5259 5260 /* Perform read from the device to flush all configuration */ 5261 RREG32(mmHW_STATE); 5262 5263 return 0; 5264 5265 disable_queues: 5266 gaudi2_disable_dma_qmans(hdev); 5267 gaudi2_disable_mme_qmans(hdev); 5268 gaudi2_disable_tpc_qmans(hdev); 5269 gaudi2_disable_rot_qmans(hdev); 5270 gaudi2_disable_nic_qmans(hdev); 5271 5272 gaudi2_disable_timestamp(hdev); 5273 5274 return rc; 5275 } 5276 5277 /** 5278 * gaudi2_send_hard_reset_cmd - common function to handle reset 5279 * 5280 * @hdev: pointer to the habanalabs device structure 5281 * 5282 * This function handles the various possible scenarios for reset. 5283 * It considers if reset is handled by driver\FW and what FW components are loaded 5284 */ 5285 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 5286 { 5287 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5288 bool heartbeat_reset, preboot_only, cpu_initialized = false; 5289 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5290 u32 cpu_boot_status; 5291 5292 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 5293 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 5294 5295 /* 5296 * Handle corner case where failure was at cpu management app load, 5297 * and driver didn't detect any failure while loading the FW, 5298 * then at such scenario driver will send only HALT_MACHINE 5299 * and no one will respond to this request since FW already back to preboot 5300 * and it cannot handle such cmd. 5301 * In this case next time the management app loads it'll check on events register 5302 * which will still have the halt indication, and will reboot the device. 5303 * The solution is to let preboot clear all relevant registers before next boot 5304 * once driver send COMMS_RST_DEV. 5305 */ 5306 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 5307 5308 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 5309 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 5310 cpu_initialized = true; 5311 5312 /* 5313 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 5314 * 1. FW reset: FW initiate the reset sequence 5315 * 2. driver reset: FW will start HALT sequence (the preparations for the 5316 * reset but not the reset itself as it is not implemented 5317 * on their part) and LKD will wait to let FW complete the 5318 * sequence before issuing the reset 5319 */ 5320 if (!preboot_only && cpu_initialized) { 5321 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 5322 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 5323 5324 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 5325 } 5326 5327 /* 5328 * When working with preboot (without Linux/Boot fit) we can 5329 * communicate only using the COMMS commands to issue halt/reset. 5330 * 5331 * For the case in which we are working with Linux/Bootfit this is a hail-mary 5332 * attempt to revive the card in the small chance that the f/w has 5333 * experienced a watchdog event, which caused it to return back to preboot. 5334 * In that case, triggering reset through GIC won't help. We need to 5335 * trigger the reset as if Linux wasn't loaded. 5336 * 5337 * We do it only if the reset cause was HB, because that would be the 5338 * indication of such an event. 5339 * 5340 * In case watchdog hasn't expired but we still got HB, then this won't 5341 * do any damage. 5342 */ 5343 5344 if (heartbeat_reset || preboot_only || !cpu_initialized) { 5345 if (hdev->asic_prop.hard_reset_done_by_fw) 5346 hl_fw_ask_hard_reset_without_linux(hdev); 5347 else 5348 hl_fw_ask_halt_machine_without_linux(hdev); 5349 } 5350 } 5351 5352 /** 5353 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 5354 * 5355 * @hdev: pointer to the habanalabs device structure 5356 * @reset_sleep_ms: sleep time in msec after reset 5357 * 5358 * This function executes hard reset based on if driver/FW should do the reset 5359 */ 5360 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms) 5361 { 5362 if (hdev->asic_prop.hard_reset_done_by_fw) { 5363 gaudi2_send_hard_reset_cmd(hdev); 5364 return; 5365 } 5366 5367 /* Set device to handle FLR by H/W as we will put the device 5368 * CPU to halt mode 5369 */ 5370 WREG32(mmPCIE_AUX_FLR_CTRL, 5371 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 5372 5373 gaudi2_send_hard_reset_cmd(hdev); 5374 5375 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 5376 } 5377 5378 /** 5379 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 5380 * 5381 * @hdev: pointer to the habanalabs device structure 5382 * @reset_sleep_ms: sleep time in msec after reset 5383 * @driver_performs_reset: true if driver should perform reset instead of f/w. 5384 * 5385 * This function executes soft reset based on if driver/FW should do the reset 5386 */ 5387 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms, 5388 bool driver_performs_reset) 5389 { 5390 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5391 5392 if (!driver_performs_reset) { 5393 /* set SP to indicate reset request sent to FW */ 5394 if (dyn_regs->cpu_rst_status) 5395 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 5396 else 5397 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5398 5399 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 5400 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 5401 return; 5402 } 5403 5404 /* Block access to engines, QMANs and SM during reset, these 5405 * RRs will be reconfigured after soft reset. 5406 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 5407 */ 5408 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 5409 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 5410 5411 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 5412 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 5413 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 5414 5415 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 5416 } 5417 5418 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms, 5419 u32 poll_timeout_us) 5420 { 5421 int i, rc = 0; 5422 u32 reg_val; 5423 5424 /* without this sleep reset will not work */ 5425 msleep(reset_sleep_ms); 5426 5427 /* We poll the BTM done indication multiple times after reset due to 5428 * a HW errata 'GAUDI2_0300' 5429 */ 5430 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5431 rc = hl_poll_timeout( 5432 hdev, 5433 mmPSOC_GLOBAL_CONF_BTM_FSM, 5434 reg_val, 5435 reg_val == 0, 5436 1000, 5437 poll_timeout_us); 5438 5439 if (rc) 5440 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 5441 } 5442 5443 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 5444 { 5445 int i, rc = 0; 5446 u32 reg_val; 5447 5448 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5449 rc = hl_poll_timeout( 5450 hdev, 5451 mmCPU_RST_STATUS_TO_HOST, 5452 reg_val, 5453 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 5454 1000, 5455 poll_timeout_us); 5456 5457 if (rc) 5458 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 5459 reg_val); 5460 } 5461 5462 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 5463 { 5464 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5465 u32 poll_timeout_us, reset_sleep_ms; 5466 bool driver_performs_reset = false; 5467 5468 if (hdev->pldm) { 5469 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 5470 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 5471 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 5472 } else { 5473 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 5474 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 5475 } 5476 5477 if (fw_reset) 5478 goto skip_reset; 5479 5480 gaudi2_reset_arcs(hdev); 5481 5482 if (hard_reset) { 5483 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 5484 gaudi2_execute_hard_reset(hdev, reset_sleep_ms); 5485 } else { 5486 /* 5487 * As we have to support also work with preboot only (which does not supports 5488 * soft reset) we have to make sure that security is disabled before letting driver 5489 * do the reset. user shall control the BFE flags to avoid asking soft reset in 5490 * secured device with preboot only. 5491 */ 5492 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 5493 !hdev->asic_prop.fw_security_enabled); 5494 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset); 5495 } 5496 5497 skip_reset: 5498 if (driver_performs_reset || hard_reset) 5499 /* 5500 * Instead of waiting for BTM indication we should wait for preboot ready: 5501 * Consider the below scenario: 5502 * 1. FW update is being triggered 5503 * - setting the dirty bit 5504 * 2. hard reset will be triggered due to the dirty bit 5505 * 3. FW initiates the reset: 5506 * - dirty bit cleared 5507 * - BTM indication cleared 5508 * - preboot ready indication cleared 5509 * 4. during hard reset: 5510 * - BTM indication will be set 5511 * - BIST test performed and another reset triggered 5512 * 5. only after this reset the preboot will set the preboot ready 5513 * 5514 * when polling on BTM indication alone we can lose sync with FW while trying to 5515 * communicate with FW that is during reset. 5516 * to overcome this we will always wait to preboot ready indication 5517 */ 5518 if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { 5519 msleep(reset_sleep_ms); 5520 hl_fw_wait_preboot_ready(hdev); 5521 } else { 5522 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); 5523 } 5524 else 5525 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 5526 5527 if (!gaudi2) 5528 return; 5529 5530 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 5531 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 5532 5533 /* 5534 * Clear NIC capability mask in order for driver to re-configure 5535 * NIC QMANs. NIC ports will not be re-configured during soft 5536 * reset as we call gaudi2_nic_init only during hard reset 5537 */ 5538 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 5539 5540 if (hard_reset) { 5541 gaudi2->hw_cap_initialized &= 5542 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 5543 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 5544 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 5545 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 5546 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 5547 5548 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 5549 } else { 5550 gaudi2->hw_cap_initialized &= 5551 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 5552 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 5553 HW_CAP_ROT_MASK); 5554 } 5555 } 5556 5557 static int gaudi2_suspend(struct hl_device *hdev) 5558 { 5559 int rc; 5560 5561 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 5562 if (rc) 5563 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 5564 5565 return rc; 5566 } 5567 5568 static int gaudi2_resume(struct hl_device *hdev) 5569 { 5570 return gaudi2_init_iatu(hdev); 5571 } 5572 5573 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 5574 void *cpu_addr, dma_addr_t dma_addr, size_t size) 5575 { 5576 int rc; 5577 5578 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 5579 VM_DONTCOPY | VM_NORESERVE; 5580 5581 #ifdef _HAS_DMA_MMAP_COHERENT 5582 5583 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 5584 if (rc) 5585 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 5586 5587 #else 5588 5589 rc = remap_pfn_range(vma, vma->vm_start, 5590 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 5591 size, vma->vm_page_prot); 5592 if (rc) 5593 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 5594 5595 #endif 5596 5597 return rc; 5598 } 5599 5600 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 5601 { 5602 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5603 u64 hw_cap_mask = 0; 5604 u64 hw_tpc_cap_bit = 0; 5605 u64 hw_nic_cap_bit = 0; 5606 u64 hw_test_cap_bit = 0; 5607 5608 switch (hw_queue_id) { 5609 case GAUDI2_QUEUE_ID_PDMA_0_0: 5610 case GAUDI2_QUEUE_ID_PDMA_0_1: 5611 case GAUDI2_QUEUE_ID_PDMA_1_0: 5612 hw_cap_mask = HW_CAP_PDMA_MASK; 5613 break; 5614 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 5615 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 5616 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 5617 break; 5618 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 5619 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 5620 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 5621 break; 5622 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 5623 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 5624 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 5625 break; 5626 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 5627 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 5628 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 5629 break; 5630 5631 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 5632 hw_test_cap_bit = HW_CAP_MME_SHIFT; 5633 break; 5634 5635 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 5636 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 5637 break; 5638 5639 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 5640 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 5641 break; 5642 5643 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 5644 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 5645 break; 5646 5647 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 5648 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 5649 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 5650 5651 /* special case where cap bit refers to the first queue id */ 5652 if (!hw_tpc_cap_bit) 5653 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 5654 break; 5655 5656 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 5657 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 5658 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 5659 break; 5660 5661 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 5662 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 5663 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 5664 break; 5665 5666 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 5667 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 5668 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 5669 break; 5670 5671 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 5672 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 5673 break; 5674 5675 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 5676 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 5677 break; 5678 5679 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 5680 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 5681 5682 /* special case where cap bit refers to the first queue id */ 5683 if (!hw_nic_cap_bit) 5684 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 5685 break; 5686 5687 case GAUDI2_QUEUE_ID_CPU_PQ: 5688 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 5689 5690 default: 5691 return false; 5692 } 5693 5694 if (hw_tpc_cap_bit) 5695 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 5696 5697 if (hw_nic_cap_bit) 5698 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 5699 5700 if (hw_test_cap_bit) 5701 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 5702 5703 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 5704 } 5705 5706 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 5707 { 5708 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5709 5710 switch (arc_id) { 5711 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5712 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5713 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 5714 5715 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5716 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5717 5718 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5719 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5720 5721 default: 5722 return false; 5723 } 5724 } 5725 5726 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5727 { 5728 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5729 5730 switch (arc_id) { 5731 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5732 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5733 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 5734 break; 5735 5736 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5737 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5738 break; 5739 5740 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5741 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5742 break; 5743 5744 default: 5745 return; 5746 } 5747 } 5748 5749 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5750 { 5751 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5752 5753 switch (arc_id) { 5754 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5755 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5756 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 5757 break; 5758 5759 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5760 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 5761 break; 5762 5763 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5764 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 5765 break; 5766 5767 default: 5768 return; 5769 } 5770 } 5771 5772 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 5773 { 5774 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5775 u32 pq_offset, reg_base, db_reg_offset, db_value; 5776 5777 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 5778 /* 5779 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 5780 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 5781 * number. 5782 */ 5783 pq_offset = (hw_queue_id & 0x3) * 4; 5784 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 5785 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 5786 } else { 5787 db_reg_offset = mmCPU_IF_PF_PQ_PI; 5788 } 5789 5790 db_value = pi; 5791 5792 /* ring the doorbell */ 5793 WREG32(db_reg_offset, db_value); 5794 5795 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 5796 /* make sure device CPU will read latest data from host */ 5797 mb(); 5798 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 5799 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 5800 } 5801 } 5802 5803 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 5804 { 5805 __le64 *pbd = (__le64 *) bd; 5806 5807 /* The QMANs are on the host memory so a simple copy suffice */ 5808 pqe[0] = pbd[0]; 5809 pqe[1] = pbd[1]; 5810 } 5811 5812 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 5813 dma_addr_t *dma_handle, gfp_t flags) 5814 { 5815 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 5816 } 5817 5818 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 5819 void *cpu_addr, dma_addr_t dma_handle) 5820 { 5821 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 5822 } 5823 5824 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 5825 u32 timeout, u64 *result) 5826 { 5827 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5828 5829 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 5830 if (result) 5831 *result = 0; 5832 return 0; 5833 } 5834 5835 if (!timeout) 5836 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 5837 5838 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 5839 } 5840 5841 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 5842 gfp_t mem_flags, dma_addr_t *dma_handle) 5843 { 5844 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 5845 return NULL; 5846 5847 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 5848 } 5849 5850 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 5851 { 5852 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 5853 } 5854 5855 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 5856 dma_addr_t *dma_handle) 5857 { 5858 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 5859 } 5860 5861 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 5862 { 5863 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 5864 } 5865 5866 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 5867 enum dma_data_direction dir) 5868 { 5869 dma_addr_t dma_addr; 5870 5871 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 5872 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 5873 return 0; 5874 5875 return dma_addr; 5876 } 5877 5878 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 5879 enum dma_data_direction dir) 5880 { 5881 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 5882 } 5883 5884 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 5885 { 5886 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5887 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5888 5889 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 5890 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5891 return -EINVAL; 5892 } 5893 5894 /* Just check if CB address is valid */ 5895 5896 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5897 parser->user_cb_size, 5898 asic_prop->sram_user_base_address, 5899 asic_prop->sram_end_address)) 5900 return 0; 5901 5902 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5903 parser->user_cb_size, 5904 asic_prop->dram_user_base_address, 5905 asic_prop->dram_end_address)) 5906 return 0; 5907 5908 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 5909 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5910 parser->user_cb_size, 5911 asic_prop->dmmu.start_addr, 5912 asic_prop->dmmu.end_addr)) 5913 return 0; 5914 5915 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 5916 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5917 parser->user_cb_size, 5918 asic_prop->pmmu.start_addr, 5919 asic_prop->pmmu.end_addr) || 5920 hl_mem_area_inside_range( 5921 (u64) (uintptr_t) parser->user_cb, 5922 parser->user_cb_size, 5923 asic_prop->pmmu_huge.start_addr, 5924 asic_prop->pmmu_huge.end_addr)) 5925 return 0; 5926 5927 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 5928 if (!hdev->pdev) 5929 return 0; 5930 5931 if (!device_iommu_mapped(&hdev->pdev->dev)) 5932 return 0; 5933 } 5934 5935 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 5936 parser->user_cb, parser->user_cb_size); 5937 5938 return -EFAULT; 5939 } 5940 5941 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5942 { 5943 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5944 5945 if (!parser->is_kernel_allocated_cb) 5946 return gaudi2_validate_cb_address(hdev, parser); 5947 5948 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5949 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 5950 return -EINVAL; 5951 } 5952 5953 return 0; 5954 } 5955 5956 static int gaudi2_send_heartbeat(struct hl_device *hdev) 5957 { 5958 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5959 5960 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 5961 return 0; 5962 5963 return hl_fw_send_heartbeat(hdev); 5964 } 5965 5966 /* This is an internal helper function, used to update the KDMA mmu props. 5967 * Should be called with a proper kdma lock. 5968 */ 5969 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 5970 bool mmu_bypass, u32 asid) 5971 { 5972 u32 rw_asid, rw_mmu_bp; 5973 5974 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 5975 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 5976 5977 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 5978 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 5979 5980 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 5981 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 5982 } 5983 5984 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 5985 u32 mon_payload, u32 sync_value) 5986 { 5987 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 5988 u8 mask; 5989 5990 sob_offset = sob_id * 4; 5991 mon_offset = mon_id * 4; 5992 5993 /* Reset the SOB value */ 5994 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 5995 5996 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 5997 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 5998 5999 /* Configure this address with CS index because CQ_EN is set */ 6000 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 6001 6002 sync_group_id = sob_id / 8; 6003 mask = ~(1 << (sob_id & 0x7)); 6004 mode = 1; /* comparison mode is "equal to" */ 6005 6006 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 6007 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 6008 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 6009 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 6010 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 6011 } 6012 6013 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6014 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6015 u64 src_addr, u64 dst_addr, 6016 u32 size, bool is_memset) 6017 { 6018 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6019 struct hl_cq_entry *cq_base; 6020 struct hl_cq *cq; 6021 u64 comp_addr; 6022 int rc; 6023 6024 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6025 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6026 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6027 6028 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6029 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6030 6031 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6032 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6033 6034 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6035 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6036 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6037 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6038 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6039 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6040 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6041 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6042 6043 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6044 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6045 6046 if (is_memset) 6047 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6048 6049 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6050 6051 /* Wait for completion */ 6052 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6053 cq_base = cq->kernel_address; 6054 polling_addr = (u32 *)&cq_base[cq->ci]; 6055 6056 if (hdev->pldm) 6057 /* for each 1MB 20 second of timeout */ 6058 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6059 else 6060 timeout = KDMA_TIMEOUT_USEC; 6061 6062 /* Polling */ 6063 rc = hl_poll_timeout_memory( 6064 hdev, 6065 polling_addr, 6066 status, 6067 (status == 1), 6068 1000, 6069 timeout, 6070 true); 6071 6072 *polling_addr = 0; 6073 6074 if (rc) { 6075 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6076 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6077 return rc; 6078 } 6079 6080 cq->ci = hl_cq_inc_ptr(cq->ci); 6081 6082 return 0; 6083 } 6084 6085 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6086 { 6087 u32 i; 6088 6089 for (i = 0 ; i < size ; i += sizeof(u32)) 6090 WREG32(addr + i, val); 6091 } 6092 6093 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6094 { 6095 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6096 6097 if (enable) { 6098 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6099 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6100 } else { 6101 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6102 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6103 } 6104 } 6105 6106 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) 6107 { 6108 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 6109 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6110 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; 6111 struct packet_msg_short *msg_short_pkt; 6112 dma_addr_t pkt_dma_addr; 6113 size_t pkt_size; 6114 int rc; 6115 6116 if (hdev->pldm) 6117 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6118 else 6119 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6120 6121 pkt_size = sizeof(*msg_short_pkt); 6122 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); 6123 if (!msg_short_pkt) { 6124 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", 6125 hw_queue_id); 6126 return -ENOMEM; 6127 } 6128 6129 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6130 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6131 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6132 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6133 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6134 6135 msg_short_pkt->value = cpu_to_le32(sob_val); 6136 msg_short_pkt->ctl = cpu_to_le32(tmp); 6137 6138 /* Reset the SOB value */ 6139 WREG32(sob_addr, 0); 6140 6141 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 6142 if (rc) { 6143 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", 6144 hw_queue_id); 6145 goto free_pkt; 6146 } 6147 6148 rc = hl_poll_timeout( 6149 hdev, 6150 sob_addr, 6151 tmp, 6152 (tmp == sob_val), 6153 1000, 6154 timeout_usec); 6155 6156 if (rc == -ETIMEDOUT) { 6157 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6158 hw_queue_id, tmp); 6159 rc = -EIO; 6160 } 6161 6162 /* Reset the SOB value */ 6163 WREG32(sob_addr, 0); 6164 6165 free_pkt: 6166 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); 6167 return rc; 6168 } 6169 6170 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6171 { 6172 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6173 6174 /* 6175 * check capability here as send_cpu_message() won't update the result 6176 * value if no capability 6177 */ 6178 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6179 return 0; 6180 6181 return hl_fw_test_cpu_queue(hdev); 6182 } 6183 6184 static int gaudi2_test_queues(struct hl_device *hdev) 6185 { 6186 int i, rc, ret_val = 0; 6187 6188 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6189 if (!gaudi2_is_queue_enabled(hdev, i)) 6190 continue; 6191 6192 gaudi2_qman_set_test_mode(hdev, i, true); 6193 rc = gaudi2_test_queue(hdev, i); 6194 gaudi2_qman_set_test_mode(hdev, i, false); 6195 6196 if (rc) { 6197 ret_val = -EINVAL; 6198 goto done; 6199 } 6200 } 6201 6202 rc = gaudi2_test_cpu_queue(hdev); 6203 if (rc) { 6204 ret_val = -EINVAL; 6205 goto done; 6206 } 6207 6208 done: 6209 return ret_val; 6210 } 6211 6212 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6213 { 6214 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6215 size_t irq_arr_size; 6216 6217 /* TODO: missing gaudi2_nic_resume. 6218 * Until implemented nic_hw_cap_initialized will remain zeroed 6219 */ 6220 gaudi2_init_arcs(hdev); 6221 gaudi2_scrub_arcs_dccm(hdev); 6222 gaudi2_init_security(hdev); 6223 6224 /* Unmask all IRQs since some could have been received during the soft reset */ 6225 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 6226 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 6227 } 6228 6229 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 6230 struct iterate_module_ctx *ctx) 6231 { 6232 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 6233 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 6234 bool is_eng_idle; 6235 int engine_idx; 6236 6237 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 6238 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 6239 else 6240 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 6241 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 6242 6243 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 6244 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 6245 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 6246 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 6247 6248 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6249 IS_TPC_IDLE(tpc_cfg_sts); 6250 *(idle_data->is_idle) &= is_eng_idle; 6251 6252 if (idle_data->mask && !is_eng_idle) 6253 set_bit(engine_idx, idle_data->mask); 6254 6255 if (idle_data->e) 6256 hl_engine_data_sprintf(idle_data->e, 6257 idle_data->tpc_fmt, dcore, inst, 6258 is_eng_idle ? "Y" : "N", 6259 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6260 } 6261 6262 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6263 struct engines_data *e) 6264 { 6265 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, 6266 mme_arch_sts, dec_swreg15, dec_enabled_bit; 6267 struct asic_fixed_properties *prop = &hdev->asic_prop; 6268 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n"; 6269 unsigned long *mask = (unsigned long *) mask_arr; 6270 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n"; 6271 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 6272 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 6273 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n"; 6274 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 6275 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 6276 bool is_idle = true, is_eng_idle; 6277 u64 offset; 6278 6279 struct gaudi2_tpc_idle_data tpc_idle_data = { 6280 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 6281 .e = e, 6282 .mask = mask, 6283 .is_idle = &is_idle, 6284 }; 6285 struct iterate_module_ctx tpc_iter = { 6286 .fn = &gaudi2_is_tpc_engine_idle, 6287 .data = &tpc_idle_data, 6288 }; 6289 6290 int engine_idx, i, j; 6291 6292 /* EDMA, Two engines per Dcore */ 6293 if (e) 6294 hl_engine_data_sprintf(e, 6295 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6296 "---- ---- ------- ------------ ----------------------\n"); 6297 6298 for (i = 0; i < NUM_OF_DCORES; i++) { 6299 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 6300 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 6301 6302 if (!(prop->edma_enabled_mask & BIT(seq))) 6303 continue; 6304 6305 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 6306 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6307 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 6308 6309 dma_core_idle_ind_mask = 6310 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset); 6311 6312 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 6313 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 6314 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 6315 6316 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6317 IS_DMA_IDLE(dma_core_idle_ind_mask); 6318 is_idle &= is_eng_idle; 6319 6320 if (mask && !is_eng_idle) 6321 set_bit(engine_idx, mask); 6322 6323 if (e) 6324 hl_engine_data_sprintf(e, edma_fmt, i, j, 6325 is_eng_idle ? "Y" : "N", 6326 qm_glbl_sts0, 6327 dma_core_idle_ind_mask); 6328 } 6329 } 6330 6331 /* PDMA, Two engines in Full chip */ 6332 if (e) 6333 hl_engine_data_sprintf(e, 6334 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6335 "---- ------- ------------ ----------------------\n"); 6336 6337 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6338 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 6339 offset = i * PDMA_OFFSET; 6340 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset); 6341 6342 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 6343 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 6344 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 6345 6346 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6347 IS_DMA_IDLE(dma_core_idle_ind_mask); 6348 is_idle &= is_eng_idle; 6349 6350 if (mask && !is_eng_idle) 6351 set_bit(engine_idx, mask); 6352 6353 if (e) 6354 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 6355 qm_glbl_sts0, dma_core_idle_ind_mask); 6356 } 6357 6358 /* NIC, twelve macros in Full chip */ 6359 if (e && hdev->nic_ports_mask) 6360 hl_engine_data_sprintf(e, 6361 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6362 "--- ------- ------------ ----------\n"); 6363 6364 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6365 if (!(i & 1)) 6366 offset = i / 2 * NIC_OFFSET; 6367 else 6368 offset += NIC_QM_OFFSET; 6369 6370 if (!(hdev->nic_ports_mask & BIT(i))) 6371 continue; 6372 6373 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 6374 6375 6376 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 6377 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 6378 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 6379 6380 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6381 is_idle &= is_eng_idle; 6382 6383 if (mask && !is_eng_idle) 6384 set_bit(engine_idx, mask); 6385 6386 if (e) 6387 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 6388 qm_glbl_sts0, qm_cgm_sts); 6389 } 6390 6391 if (e) 6392 hl_engine_data_sprintf(e, 6393 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6394 "--- ---- ------- ------------ ---------------\n"); 6395 /* MME, one per Dcore */ 6396 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6397 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 6398 offset = i * DCORE_OFFSET; 6399 6400 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 6401 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 6402 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 6403 6404 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6405 is_idle &= is_eng_idle; 6406 6407 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 6408 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 6409 is_idle &= is_eng_idle; 6410 6411 if (e) 6412 hl_engine_data_sprintf(e, mme_fmt, i, "N", 6413 is_eng_idle ? "Y" : "N", 6414 qm_glbl_sts0, 6415 mme_arch_sts); 6416 6417 if (mask && !is_eng_idle) 6418 set_bit(engine_idx, mask); 6419 } 6420 6421 /* 6422 * TPC 6423 */ 6424 if (e && prop->tpc_enabled_mask) 6425 hl_engine_data_sprintf(e, 6426 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" 6427 "---- --- -------- ------------ ---------- ----------------------\n"); 6428 6429 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6430 6431 /* Decoders, two each Dcore and two shared PCIe decoders */ 6432 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6433 hl_engine_data_sprintf(e, 6434 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 6435 "---- --- ------- ---------------\n"); 6436 6437 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6438 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 6439 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 6440 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 6441 continue; 6442 6443 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 6444 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6445 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 6446 6447 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 6448 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6449 is_idle &= is_eng_idle; 6450 6451 if (mask && !is_eng_idle) 6452 set_bit(engine_idx, mask); 6453 6454 if (e) 6455 hl_engine_data_sprintf(e, dec_fmt, i, j, 6456 is_eng_idle ? "Y" : "N", dec_swreg15); 6457 } 6458 } 6459 6460 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6461 hl_engine_data_sprintf(e, 6462 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 6463 "-------- ------- ---------------\n"); 6464 6465 /* Check shared(PCIe) decoders */ 6466 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 6467 dec_enabled_bit = PCIE_DEC_SHIFT + i; 6468 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 6469 continue; 6470 6471 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 6472 offset = i * DCORE_DEC_OFFSET; 6473 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 6474 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6475 is_idle &= is_eng_idle; 6476 6477 if (mask && !is_eng_idle) 6478 set_bit(engine_idx, mask); 6479 6480 if (e) 6481 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 6482 is_eng_idle ? "Y" : "N", dec_swreg15); 6483 } 6484 6485 if (e) 6486 hl_engine_data_sprintf(e, 6487 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6488 "---- ---- ------- ------------ ---------- -------------\n"); 6489 6490 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6491 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 6492 6493 offset = i * ROT_OFFSET; 6494 6495 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 6496 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 6497 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 6498 6499 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6500 is_idle &= is_eng_idle; 6501 6502 if (mask && !is_eng_idle) 6503 set_bit(engine_idx, mask); 6504 6505 if (e) 6506 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6507 qm_glbl_sts0, qm_cgm_sts, "-"); 6508 } 6509 6510 return is_idle; 6511 } 6512 6513 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 6514 __acquires(&gaudi2->hw_queues_lock) 6515 { 6516 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6517 6518 spin_lock(&gaudi2->hw_queues_lock); 6519 } 6520 6521 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 6522 __releases(&gaudi2->hw_queues_lock) 6523 { 6524 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6525 6526 spin_unlock(&gaudi2->hw_queues_lock); 6527 } 6528 6529 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 6530 { 6531 return hdev->pdev->device; 6532 } 6533 6534 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 6535 { 6536 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6537 6538 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6539 return 0; 6540 6541 return hl_fw_get_eeprom_data(hdev, data, max_size); 6542 } 6543 6544 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 6545 { 6546 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 6547 } 6548 6549 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 6550 { 6551 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6552 6553 if (aggregate) { 6554 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 6555 return gaudi2->events_stat_aggregate; 6556 } 6557 6558 *size = (u32) sizeof(gaudi2->events_stat); 6559 return gaudi2->events_stat; 6560 } 6561 6562 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 6563 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6564 { 6565 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 6566 dcore_vdec_id + DCORE_OFFSET * dcore_id; 6567 6568 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6569 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6570 6571 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6572 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6573 6574 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6575 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6576 6577 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6578 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6579 6580 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6581 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6582 } 6583 6584 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 6585 { 6586 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6587 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6588 struct asic_fixed_properties *prop = &hdev->asic_prop; 6589 u32 dcore_offset = dcore_id * DCORE_OFFSET; 6590 u32 vdec_id, i, ports_offset, reg_val; 6591 u8 edma_seq_base; 6592 6593 /* EDMA */ 6594 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 6595 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 6596 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6597 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6598 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6599 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6600 } 6601 6602 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 6603 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6604 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6605 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6606 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6607 } 6608 6609 /* Sync Mngr */ 6610 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 6611 /* 6612 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 6613 * for any access type 6614 */ 6615 if (dcore_id > 0) { 6616 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 6617 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 6618 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 6619 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 6620 } 6621 6622 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 6623 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 6624 6625 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 6626 ports_offset = i * DCORE_MME_SBTE_OFFSET; 6627 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 6628 dcore_offset + ports_offset, 0); 6629 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 6630 dcore_offset + ports_offset, rw_asid); 6631 } 6632 6633 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 6634 ports_offset = i * DCORE_MME_WB_OFFSET; 6635 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 6636 dcore_offset + ports_offset, 0); 6637 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 6638 dcore_offset + ports_offset, rw_asid); 6639 } 6640 6641 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6642 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6643 6644 /* 6645 * Decoders 6646 */ 6647 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 6648 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 6649 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 6650 } 6651 } 6652 6653 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 6654 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6655 { 6656 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 6657 6658 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6659 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6660 6661 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6662 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6663 6664 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6665 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6666 6667 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6668 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6669 6670 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6671 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6672 } 6673 6674 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 6675 u32 rw_asid, u32 rw_mmu_bp) 6676 { 6677 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 6678 6679 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 6680 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 6681 } 6682 6683 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 6684 { 6685 u32 reg_base, reg_offset, reg_val = 0; 6686 6687 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 6688 6689 /* Enable MMU and configure asid for all relevant ARC regions */ 6690 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 6691 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 6692 6693 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 6694 WREG32(reg_base + reg_offset, reg_val); 6695 6696 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 6697 WREG32(reg_base + reg_offset, reg_val); 6698 6699 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 6700 WREG32(reg_base + reg_offset, reg_val); 6701 6702 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 6703 WREG32(reg_base + reg_offset, reg_val); 6704 6705 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 6706 WREG32(reg_base + reg_offset, reg_val); 6707 6708 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 6709 WREG32(reg_base + reg_offset, reg_val); 6710 6711 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 6712 WREG32(reg_base + reg_offset, reg_val); 6713 6714 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 6715 WREG32(reg_base + reg_offset, reg_val); 6716 6717 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 6718 WREG32(reg_base + reg_offset, reg_val); 6719 6720 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 6721 WREG32(reg_base + reg_offset, reg_val); 6722 6723 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 6724 WREG32(reg_base + reg_offset, reg_val); 6725 } 6726 6727 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 6728 { 6729 int i; 6730 6731 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 6732 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 6733 6734 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6735 gaudi2_arc_mmu_prepare(hdev, i, asid); 6736 6737 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 6738 if (!gaudi2_is_queue_enabled(hdev, i)) 6739 continue; 6740 6741 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 6742 } 6743 6744 return 0; 6745 } 6746 6747 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 6748 { 6749 struct asic_fixed_properties *prop = &hdev->asic_prop; 6750 u32 rw_asid, offset; 6751 int rc, i; 6752 6753 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 6754 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 6755 6756 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6757 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6758 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6759 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6760 6761 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6762 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6763 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6764 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6765 6766 /* ROT */ 6767 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6768 offset = i * ROT_OFFSET; 6769 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 6770 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6771 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 6772 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 6773 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 6774 } 6775 6776 /* Shared Decoders are the last bits in the decoders mask */ 6777 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 6778 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 6779 6780 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 6781 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 6782 6783 /* arc farm arc dup eng */ 6784 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6785 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 6786 6787 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 6788 if (rc) 6789 return rc; 6790 6791 return 0; 6792 } 6793 6794 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 6795 struct iterate_module_ctx *ctx) 6796 { 6797 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 6798 6799 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 6800 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 6801 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6802 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 6803 } 6804 6805 /* zero the MMUBP and set the ASID */ 6806 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 6807 { 6808 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6809 struct gaudi2_tpc_mmu_data tpc_mmu_data; 6810 struct iterate_module_ctx tpc_iter = { 6811 .fn = &gaudi2_tpc_mmu_prepare, 6812 .data = &tpc_mmu_data, 6813 }; 6814 int rc, i; 6815 6816 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 6817 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6818 return -EINVAL; 6819 } 6820 6821 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 6822 return 0; 6823 6824 rc = gaudi2_mmu_shared_prepare(hdev, asid); 6825 if (rc) 6826 return rc; 6827 6828 /* configure DCORE MMUs */ 6829 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6830 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6831 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6832 for (i = 0 ; i < NUM_OF_DCORES ; i++) 6833 gaudi2_mmu_dcore_prepare(hdev, i, asid); 6834 6835 return 0; 6836 } 6837 6838 static inline bool is_info_event(u32 event) 6839 { 6840 switch (event) { 6841 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 6842 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 6843 6844 /* return in case of NIC status event - these events are received periodically and not as 6845 * an indication to an error. 6846 */ 6847 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 6848 return true; 6849 default: 6850 return false; 6851 } 6852 } 6853 6854 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 6855 bool ratelimited, const char *fmt, ...) 6856 { 6857 struct va_format vaf; 6858 va_list args; 6859 6860 va_start(args, fmt); 6861 vaf.fmt = fmt; 6862 vaf.va = &args; 6863 6864 if (ratelimited) 6865 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 6866 gaudi2_irq_map_table[event_type].valid ? 6867 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6868 else 6869 dev_err(hdev->dev, "%s: %pV\n", 6870 gaudi2_irq_map_table[event_type].valid ? 6871 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6872 6873 va_end(args); 6874 } 6875 6876 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 6877 struct hl_eq_ecc_data *ecc_data) 6878 { 6879 u64 ecc_address = 0, ecc_syndrom = 0; 6880 u8 memory_wrapper_idx = 0; 6881 6882 ecc_address = le64_to_cpu(ecc_data->ecc_address); 6883 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 6884 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 6885 6886 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 6887 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n", 6888 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 6889 6890 return !!ecc_data->is_critical; 6891 } 6892 6893 /* 6894 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6895 * 6896 * @idx: the current pi/ci value 6897 * @q_len: the queue length (power of 2) 6898 * 6899 * @return the cyclically decremented index 6900 */ 6901 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 6902 { 6903 u32 mask = q_len - 1; 6904 6905 /* 6906 * modular decrement is equivalent to adding (queue_size -1) 6907 * later we take LSBs to make sure the value is in the 6908 * range [0, queue_len - 1] 6909 */ 6910 return (idx + q_len - 1) & mask; 6911 } 6912 6913 /** 6914 * gaudi2_print_sw_config_stream_data - print SW config stream data 6915 * 6916 * @hdev: pointer to the habanalabs device structure 6917 * @stream: the QMAN's stream 6918 * @qman_base: base address of QMAN registers block 6919 */ 6920 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 6921 u32 stream, u64 qman_base) 6922 { 6923 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6924 u32 cq_ptr_lo_off, size; 6925 6926 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 6927 6928 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 6929 stream * cq_ptr_lo_off; 6930 6931 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6932 6933 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6934 6935 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6936 size = RREG32(cq_tsize); 6937 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 6938 stream, cq_ptr, size); 6939 } 6940 6941 /** 6942 * gaudi2_print_last_pqes_on_err - print last PQEs on error 6943 * 6944 * @hdev: pointer to the habanalabs device structure 6945 * @qid_base: first QID of the QMAN (out of 4 streams) 6946 * @stream: the QMAN's stream 6947 * @qman_base: base address of QMAN registers block 6948 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6949 */ 6950 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 6951 u64 qman_base, bool pr_sw_conf) 6952 { 6953 u32 ci, qm_ci_stream_off; 6954 struct hl_hw_queue *q; 6955 u64 pq_ci; 6956 int i; 6957 6958 q = &hdev->kernel_queues[qid_base + stream]; 6959 6960 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 6961 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 6962 stream * qm_ci_stream_off; 6963 6964 hdev->asic_funcs->hw_queues_lock(hdev); 6965 6966 if (pr_sw_conf) 6967 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 6968 6969 ci = RREG32(pq_ci); 6970 6971 /* we should start printing form ci -1 */ 6972 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6973 6974 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6975 struct hl_bd *bd; 6976 u64 addr; 6977 u32 len; 6978 6979 bd = q->kernel_address; 6980 bd += ci; 6981 6982 len = le32_to_cpu(bd->len); 6983 /* len 0 means uninitialized entry- break */ 6984 if (!len) 6985 break; 6986 6987 addr = le64_to_cpu(bd->ptr); 6988 6989 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 6990 stream, ci, addr, len); 6991 6992 /* get previous ci, wrap if needed */ 6993 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6994 } 6995 6996 hdev->asic_funcs->hw_queues_unlock(hdev); 6997 } 6998 6999 /** 7000 * print_qman_data_on_err - extract QMAN data on error 7001 * 7002 * @hdev: pointer to the habanalabs device structure 7003 * @qid_base: first QID of the QMAN (out of 4 streams) 7004 * @stream: the QMAN's stream 7005 * @qman_base: base address of QMAN registers block 7006 * 7007 * This function attempt to extract as much data as possible on QMAN error. 7008 * On upper CP print the SW config stream data and last 8 PQEs. 7009 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7010 */ 7011 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7012 { 7013 u32 i; 7014 7015 if (stream != QMAN_STREAMS) { 7016 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7017 return; 7018 } 7019 7020 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7021 7022 for (i = 0 ; i < QMAN_STREAMS ; i++) 7023 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7024 } 7025 7026 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7027 u64 qman_base, u32 qid_base) 7028 { 7029 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7030 u64 glbl_sts_addr, arb_err_addr; 7031 char reg_desc[32]; 7032 7033 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7034 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7035 7036 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7037 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7038 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7039 7040 if (!glbl_sts_val) 7041 continue; 7042 7043 if (i == QMAN_STREAMS) { 7044 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7045 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7046 } else { 7047 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7048 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7049 } 7050 7051 for (j = 0 ; j < num_error_causes ; j++) 7052 if (glbl_sts_val & BIT(j)) { 7053 gaudi2_print_event(hdev, event_type, true, 7054 "%s. err cause: %s", reg_desc, 7055 i == QMAN_STREAMS ? 7056 gaudi2_qman_lower_cp_error_cause[j] : 7057 gaudi2_qman_error_cause[j]); 7058 error_count++; 7059 } 7060 7061 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7062 } 7063 7064 arb_err_val = RREG32(arb_err_addr); 7065 7066 if (!arb_err_val) 7067 goto out; 7068 7069 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7070 if (arb_err_val & BIT(j)) { 7071 gaudi2_print_event(hdev, event_type, true, 7072 "ARB_ERR. err cause: %s", 7073 gaudi2_qman_arb_error_cause[j]); 7074 error_count++; 7075 } 7076 } 7077 7078 out: 7079 return error_count; 7080 } 7081 7082 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7083 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7084 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, 7085 enum gaudi2_engine_id id, u64 *event_mask) 7086 { 7087 u32 razwi_hi, razwi_lo, razwi_xy; 7088 u16 eng_id = id; 7089 u8 rd_wr_flag; 7090 7091 if (is_write) { 7092 if (read_razwi_regs) { 7093 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7094 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7095 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7096 } else { 7097 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg); 7098 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); 7099 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); 7100 } 7101 rd_wr_flag = HL_RAZWI_WRITE; 7102 } else { 7103 if (read_razwi_regs) { 7104 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7105 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7106 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7107 } else { 7108 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg); 7109 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); 7110 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); 7111 } 7112 rd_wr_flag = HL_RAZWI_READ; 7113 } 7114 7115 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7116 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7117 7118 dev_err_ratelimited(hdev->dev, 7119 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7120 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7121 } 7122 7123 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7124 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7125 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, 7126 enum gaudi2_engine_id id, u64 *event_mask) 7127 { 7128 u32 razwi_addr, razwi_xy; 7129 u16 eng_id = id; 7130 u8 rd_wr_flag; 7131 7132 if (is_write) { 7133 if (read_razwi_regs) { 7134 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7135 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7136 } else { 7137 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg); 7138 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg); 7139 } 7140 7141 rd_wr_flag = HL_RAZWI_WRITE; 7142 } else { 7143 if (read_razwi_regs) { 7144 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7145 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7146 } else { 7147 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg); 7148 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg); 7149 } 7150 7151 rd_wr_flag = HL_RAZWI_READ; 7152 } 7153 7154 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7155 dev_err_ratelimited(hdev->dev, 7156 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", 7157 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7158 razwi_xy); 7159 } 7160 7161 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7162 enum razwi_event_sources module, u8 module_idx) 7163 { 7164 switch (module) { 7165 case RAZWI_TPC: 7166 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7167 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7168 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7169 (module_idx % NUM_OF_TPC_PER_DCORE) + 7170 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7171 7172 case RAZWI_MME: 7173 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 7174 (module_idx * ENGINE_ID_DCORE_OFFSET)); 7175 7176 case RAZWI_EDMA: 7177 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7178 (module_idx % NUM_OF_EDMA_PER_DCORE)); 7179 7180 case RAZWI_PDMA: 7181 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 7182 7183 case RAZWI_NIC: 7184 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 7185 7186 case RAZWI_DEC: 7187 if (module_idx == 8) 7188 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 7189 7190 if (module_idx == 9) 7191 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 7192 ; 7193 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7194 (module_idx % NUM_OF_DEC_PER_DCORE) + 7195 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7196 7197 case RAZWI_ROT: 7198 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 7199 7200 default: 7201 return GAUDI2_ENGINE_ID_SIZE; 7202 } 7203 } 7204 7205 /* 7206 * This function handles RR(Range register) hit events. 7207 * raised be initiators not PSOC RAZWI. 7208 */ 7209 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 7210 enum razwi_event_sources module, u8 module_idx, 7211 u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info, 7212 u64 *event_mask) 7213 { 7214 bool via_sft = false, read_razwi_regs = false; 7215 u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id; 7216 u64 rtr_mstr_if_base_addr; 7217 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 7218 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 7219 char initiator_name[64]; 7220 7221 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info) 7222 read_razwi_regs = true; 7223 7224 switch (module) { 7225 case RAZWI_TPC: 7226 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx]; 7227 sprintf(initiator_name, "TPC_%u", module_idx); 7228 break; 7229 case RAZWI_MME: 7230 sprintf(initiator_name, "MME_%u", module_idx); 7231 switch (module_sub_idx) { 7232 case MME_WAP0: 7233 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 7234 break; 7235 case MME_WAP1: 7236 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 7237 break; 7238 case MME_WRITE: 7239 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 7240 break; 7241 case MME_READ: 7242 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 7243 break; 7244 case MME_SBTE0: 7245 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 7246 break; 7247 case MME_SBTE1: 7248 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 7249 break; 7250 case MME_SBTE2: 7251 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 7252 break; 7253 case MME_SBTE3: 7254 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 7255 break; 7256 case MME_SBTE4: 7257 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 7258 break; 7259 default: 7260 return; 7261 } 7262 break; 7263 case RAZWI_EDMA: 7264 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id; 7265 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id; 7266 via_sft = true; 7267 sprintf(initiator_name, "EDMA_%u", module_idx); 7268 break; 7269 case RAZWI_PDMA: 7270 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx]; 7271 sprintf(initiator_name, "PDMA_%u", module_idx); 7272 break; 7273 case RAZWI_NIC: 7274 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx]; 7275 sprintf(initiator_name, "NIC_%u", module_idx); 7276 break; 7277 case RAZWI_DEC: 7278 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx]; 7279 sprintf(initiator_name, "DEC_%u", module_idx); 7280 break; 7281 case RAZWI_ROT: 7282 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx]; 7283 sprintf(initiator_name, "ROT_%u", module_idx); 7284 break; 7285 default: 7286 return; 7287 } 7288 7289 if (!read_razwi_regs) { 7290 if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) { 7291 hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) & 7292 RAZWI_HAPPENED_AW; 7293 hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) & 7294 RAZWI_HAPPENED_AR; 7295 } else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) { 7296 lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) & 7297 RAZWI_HAPPENED_AW; 7298 lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) & 7299 RAZWI_HAPPENED_AR; 7300 } 7301 rtr_mstr_if_base_addr = 0; 7302 7303 goto dump_info; 7304 } 7305 7306 /* Find router mstr_if register base */ 7307 if (via_sft) { 7308 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + 7309 dcore_id * SFT_DCORE_OFFSET + 7310 sft_id * SFT_IF_OFFSET + 7311 RTR_MSTR_IF_OFFSET; 7312 } else { 7313 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7314 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7315 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 7316 dcore_id * DCORE_OFFSET + 7317 dcore_rtr_id * DCORE_RTR_OFFSET + 7318 RTR_MSTR_IF_OFFSET; 7319 } 7320 7321 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 7322 hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 7323 7324 hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 7325 7326 if (via_sft) { 7327 /* SFT has separate MSTR_IF for LBW, only there we can 7328 * read the LBW razwi related registers 7329 */ 7330 u64 base; 7331 7332 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET + 7333 RTR_LBW_MSTR_IF_OFFSET; 7334 7335 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7336 7337 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7338 } else { 7339 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7340 7341 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7342 } 7343 7344 dump_info: 7345 /* check if there is no RR razwi indication at all */ 7346 if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar) 7347 return; 7348 7349 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 7350 if (hbw_shrd_aw) { 7351 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7352 initiator_name, read_razwi_regs, razwi_info, 7353 eng_id, event_mask); 7354 7355 /* Clear event indication */ 7356 if (read_razwi_regs) 7357 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 7358 } 7359 7360 if (hbw_shrd_ar) { 7361 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7362 initiator_name, read_razwi_regs, razwi_info, 7363 eng_id, event_mask); 7364 7365 /* Clear event indication */ 7366 if (read_razwi_regs) 7367 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 7368 } 7369 7370 if (lbw_shrd_aw) { 7371 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7372 initiator_name, read_razwi_regs, razwi_info, 7373 eng_id, event_mask); 7374 7375 /* Clear event indication */ 7376 if (read_razwi_regs) 7377 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 7378 } 7379 7380 if (lbw_shrd_ar) { 7381 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7382 initiator_name, read_razwi_regs, razwi_info, 7383 eng_id, event_mask); 7384 7385 /* Clear event indication */ 7386 if (read_razwi_regs) 7387 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 7388 } 7389 } 7390 7391 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 7392 { 7393 struct asic_fixed_properties *prop = &hdev->asic_prop; 7394 u8 mod_idx, sub_mod; 7395 7396 /* check all TPCs */ 7397 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 7398 if (prop->tpc_enabled_mask & BIT(mod_idx)) 7399 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL, 7400 NULL); 7401 } 7402 7403 /* check all MMEs */ 7404 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7405 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 7406 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 7407 sub_mod, NULL, NULL); 7408 7409 /* check all EDMAs */ 7410 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7411 if (prop->edma_enabled_mask & BIT(mod_idx)) 7412 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL, 7413 NULL); 7414 7415 /* check all PDMAs */ 7416 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 7417 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL, 7418 NULL); 7419 7420 /* check all NICs */ 7421 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 7422 if (hdev->nic_ports_mask & BIT(mod_idx)) 7423 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 7424 NULL, NULL); 7425 7426 /* check all DECs */ 7427 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 7428 if (prop->decoder_enabled_mask & BIT(mod_idx)) 7429 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL, 7430 NULL); 7431 7432 /* check all ROTs */ 7433 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 7434 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL); 7435 } 7436 7437 static const char *gaudi2_get_initiators_name(u32 rtr_id) 7438 { 7439 switch (rtr_id) { 7440 case DCORE0_RTR0: 7441 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; 7442 case DCORE0_RTR1: 7443 return "TPC0/1"; 7444 case DCORE0_RTR2: 7445 return "TPC2/3"; 7446 case DCORE0_RTR3: 7447 return "TPC4/5"; 7448 case DCORE0_RTR4: 7449 return "MME0_SBTE0/1"; 7450 case DCORE0_RTR5: 7451 return "MME0_WAP0/SBTE2"; 7452 case DCORE0_RTR6: 7453 return "MME0_CTRL_WR/SBTE3"; 7454 case DCORE0_RTR7: 7455 return "MME0_WAP1/CTRL_RD/SBTE4"; 7456 case DCORE1_RTR0: 7457 return "MME1_WAP1/CTRL_RD/SBTE4"; 7458 case DCORE1_RTR1: 7459 return "MME1_CTRL_WR/SBTE3"; 7460 case DCORE1_RTR2: 7461 return "MME1_WAP0/SBTE2"; 7462 case DCORE1_RTR3: 7463 return "MME1_SBTE0/1"; 7464 case DCORE1_RTR4: 7465 return "TPC10/11"; 7466 case DCORE1_RTR5: 7467 return "TPC8/9"; 7468 case DCORE1_RTR6: 7469 return "TPC6/7"; 7470 case DCORE1_RTR7: 7471 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; 7472 case DCORE2_RTR0: 7473 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; 7474 case DCORE2_RTR1: 7475 return "TPC16/17"; 7476 case DCORE2_RTR2: 7477 return "TPC14/15"; 7478 case DCORE2_RTR3: 7479 return "TPC12/13"; 7480 case DCORE2_RTR4: 7481 return "MME2_SBTE0/1"; 7482 case DCORE2_RTR5: 7483 return "MME2_WAP0/SBTE2"; 7484 case DCORE2_RTR6: 7485 return "MME2_CTRL_WR/SBTE3"; 7486 case DCORE2_RTR7: 7487 return "MME2_WAP1/CTRL_RD/SBTE4"; 7488 case DCORE3_RTR0: 7489 return "MME3_WAP1/CTRL_RD/SBTE4"; 7490 case DCORE3_RTR1: 7491 return "MME3_CTRL_WR/SBTE3"; 7492 case DCORE3_RTR2: 7493 return "MME3_WAP0/SBTE2"; 7494 case DCORE3_RTR3: 7495 return "MME3_SBTE0/1"; 7496 case DCORE3_RTR4: 7497 return "TPC18/19"; 7498 case DCORE3_RTR5: 7499 return "TPC20/21"; 7500 case DCORE3_RTR6: 7501 return "TPC22/23"; 7502 case DCORE3_RTR7: 7503 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; 7504 default: 7505 return "N/A"; 7506 } 7507 } 7508 7509 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) 7510 { 7511 switch (rtr_id) { 7512 case DCORE0_RTR0: 7513 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; 7514 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; 7515 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; 7516 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; 7517 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7518 engines[5] = GAUDI2_ENGINE_ID_PDMA_0; 7519 engines[6] = GAUDI2_ENGINE_ID_PDMA_1; 7520 engines[7] = GAUDI2_ENGINE_ID_PCIE; 7521 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; 7522 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; 7523 engines[10] = GAUDI2_ENGINE_ID_PSOC; 7524 return 11; 7525 7526 case DCORE0_RTR1: 7527 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; 7528 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; 7529 return 2; 7530 7531 case DCORE0_RTR2: 7532 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; 7533 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; 7534 return 2; 7535 7536 case DCORE0_RTR3: 7537 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; 7538 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; 7539 return 2; 7540 7541 case DCORE0_RTR4: 7542 case DCORE0_RTR5: 7543 case DCORE0_RTR6: 7544 case DCORE0_RTR7: 7545 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; 7546 return 1; 7547 7548 case DCORE1_RTR0: 7549 case DCORE1_RTR1: 7550 case DCORE1_RTR2: 7551 case DCORE1_RTR3: 7552 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; 7553 return 1; 7554 7555 case DCORE1_RTR4: 7556 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; 7557 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; 7558 return 2; 7559 7560 case DCORE1_RTR5: 7561 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; 7562 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; 7563 return 2; 7564 7565 case DCORE1_RTR6: 7566 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; 7567 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; 7568 return 2; 7569 7570 case DCORE1_RTR7: 7571 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; 7572 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; 7573 engines[2] = GAUDI2_ENGINE_ID_NIC0_0; 7574 engines[3] = GAUDI2_ENGINE_ID_NIC1_0; 7575 engines[4] = GAUDI2_ENGINE_ID_NIC2_0; 7576 engines[5] = GAUDI2_ENGINE_ID_NIC3_0; 7577 engines[6] = GAUDI2_ENGINE_ID_NIC4_0; 7578 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; 7579 engines[8] = GAUDI2_ENGINE_ID_KDMA; 7580 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; 7581 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; 7582 return 11; 7583 7584 case DCORE2_RTR0: 7585 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; 7586 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; 7587 engines[2] = GAUDI2_ENGINE_ID_NIC5_0; 7588 engines[3] = GAUDI2_ENGINE_ID_NIC6_0; 7589 engines[4] = GAUDI2_ENGINE_ID_NIC7_0; 7590 engines[5] = GAUDI2_ENGINE_ID_NIC8_0; 7591 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; 7592 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; 7593 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7594 return 9; 7595 7596 case DCORE2_RTR1: 7597 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; 7598 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; 7599 return 2; 7600 7601 case DCORE2_RTR2: 7602 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; 7603 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; 7604 return 2; 7605 7606 case DCORE2_RTR3: 7607 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; 7608 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; 7609 return 2; 7610 7611 case DCORE2_RTR4: 7612 case DCORE2_RTR5: 7613 case DCORE2_RTR6: 7614 case DCORE2_RTR7: 7615 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; 7616 return 1; 7617 case DCORE3_RTR0: 7618 case DCORE3_RTR1: 7619 case DCORE3_RTR2: 7620 case DCORE3_RTR3: 7621 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; 7622 return 1; 7623 case DCORE3_RTR4: 7624 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; 7625 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; 7626 return 2; 7627 case DCORE3_RTR5: 7628 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; 7629 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; 7630 return 2; 7631 case DCORE3_RTR6: 7632 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; 7633 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; 7634 return 2; 7635 case DCORE3_RTR7: 7636 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; 7637 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; 7638 engines[2] = GAUDI2_ENGINE_ID_NIC9_0; 7639 engines[3] = GAUDI2_ENGINE_ID_NIC10_0; 7640 engines[4] = GAUDI2_ENGINE_ID_NIC11_0; 7641 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; 7642 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; 7643 engines[7] = GAUDI2_ENGINE_ID_ROT_1; 7644 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7645 return 9; 7646 default: 7647 return 0; 7648 } 7649 } 7650 7651 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7652 u64 rtr_ctrl_base_addr, bool is_write, 7653 u64 *event_mask) 7654 { 7655 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7656 u32 razwi_hi, razwi_lo; 7657 u8 rd_wr_flag; 7658 7659 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7660 7661 if (is_write) { 7662 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); 7663 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); 7664 rd_wr_flag = HL_RAZWI_WRITE; 7665 7666 /* Clear set indication */ 7667 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); 7668 } else { 7669 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); 7670 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); 7671 rd_wr_flag = HL_RAZWI_READ; 7672 7673 /* Clear set indication */ 7674 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); 7675 } 7676 7677 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, 7678 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7679 dev_err_ratelimited(hdev->dev, 7680 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", 7681 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); 7682 7683 dev_err_ratelimited(hdev->dev, 7684 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7685 } 7686 7687 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7688 u64 rtr_ctrl_base_addr, bool is_write, 7689 u64 *event_mask) 7690 { 7691 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7692 u32 razwi_addr; 7693 u8 rd_wr_flag; 7694 7695 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7696 7697 if (is_write) { 7698 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); 7699 rd_wr_flag = HL_RAZWI_WRITE; 7700 7701 /* Clear set indication */ 7702 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); 7703 } else { 7704 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); 7705 rd_wr_flag = HL_RAZWI_READ; 7706 7707 /* Clear set indication */ 7708 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); 7709 } 7710 7711 hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, 7712 event_mask); 7713 dev_err_ratelimited(hdev->dev, 7714 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", 7715 is_write ? "WR" : "RD", rtr_id, razwi_addr); 7716 7717 dev_err_ratelimited(hdev->dev, 7718 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7719 } 7720 7721 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 7722 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 7723 { 7724 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, 7725 razwi_mask_info, razwi_intr = 0, error_count = 0; 7726 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES; 7727 u64 rtr_ctrl_base_addr; 7728 7729 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 7730 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 7731 if (!razwi_intr) 7732 return 0; 7733 } 7734 7735 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 7736 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); 7737 7738 dev_err_ratelimited(hdev->dev, 7739 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7740 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 7741 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 7742 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 7743 xy, 7744 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 7745 7746 if (xy == 0) { 7747 dev_err_ratelimited(hdev->dev, 7748 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); 7749 goto clear; 7750 } 7751 7752 /* Find router id by router coordinates */ 7753 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++) 7754 if (rtr_coordinates_to_rtr_id[rtr_id] == xy) 7755 break; 7756 7757 if (rtr_id == rtr_map_arr_len) { 7758 dev_err_ratelimited(hdev->dev, 7759 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy); 7760 goto clear; 7761 } 7762 7763 /* Find router mstr_if register base */ 7764 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7765 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7766 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET + 7767 dcore_rtr_id * DCORE_RTR_OFFSET; 7768 7769 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET); 7770 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET); 7771 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET); 7772 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); 7773 7774 if (hbw_aw_set) 7775 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7776 rtr_ctrl_base_addr, true, event_mask); 7777 7778 if (hbw_ar_set) 7779 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7780 rtr_ctrl_base_addr, false, event_mask); 7781 7782 if (lbw_aw_set) 7783 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7784 rtr_ctrl_base_addr, true, event_mask); 7785 7786 if (lbw_ar_set) 7787 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7788 rtr_ctrl_base_addr, false, event_mask); 7789 7790 error_count++; 7791 7792 clear: 7793 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 7794 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 7795 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 7796 7797 return error_count; 7798 } 7799 7800 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 7801 { 7802 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7803 7804 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 7805 7806 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 7807 if (sts_val & BIT(i)) { 7808 gaudi2_print_event(hdev, event_type, true, 7809 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 7810 sts_clr_val |= BIT(i); 7811 error_count++; 7812 } 7813 } 7814 7815 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 7816 7817 return error_count; 7818 } 7819 7820 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 7821 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 7822 { 7823 enum razwi_event_sources module; 7824 u32 error_count = 0; 7825 u64 qman_base; 7826 u8 index; 7827 7828 switch (event_type) { 7829 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 7830 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 7831 qman_base = mmDCORE0_TPC0_QM_BASE + 7832 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 7833 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 7834 module = RAZWI_TPC; 7835 break; 7836 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 7837 qman_base = mmDCORE0_TPC6_QM_BASE; 7838 module = RAZWI_TPC; 7839 break; 7840 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 7841 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 7842 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 7843 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 7844 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 7845 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 7846 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 7847 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 7848 module = RAZWI_MME; 7849 break; 7850 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 7851 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 7852 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 7853 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 7854 module = RAZWI_PDMA; 7855 break; 7856 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 7857 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 7858 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 7859 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 7860 module = RAZWI_ROT; 7861 break; 7862 default: 7863 return 0; 7864 } 7865 7866 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7867 7868 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 7869 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 7870 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 7871 error_count += _gaudi2_handle_qm_sei_err(hdev, 7872 qman_base + NIC_QM_OFFSET, event_type); 7873 7874 /* check if RAZWI happened */ 7875 if (razwi_info) 7876 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask); 7877 7878 return error_count; 7879 } 7880 7881 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type) 7882 { 7883 u32 qid_base, error_count = 0; 7884 u64 qman_base; 7885 u8 index; 7886 7887 switch (event_type) { 7888 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 7889 index = event_type - GAUDI2_EVENT_TPC0_QM; 7890 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 7891 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7892 break; 7893 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 7894 index = event_type - GAUDI2_EVENT_TPC6_QM; 7895 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 7896 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7897 break; 7898 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 7899 index = event_type - GAUDI2_EVENT_TPC12_QM; 7900 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 7901 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7902 break; 7903 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 7904 index = event_type - GAUDI2_EVENT_TPC18_QM; 7905 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 7906 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7907 break; 7908 case GAUDI2_EVENT_TPC24_QM: 7909 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 7910 qman_base = mmDCORE0_TPC6_QM_BASE; 7911 break; 7912 case GAUDI2_EVENT_MME0_QM: 7913 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 7914 qman_base = mmDCORE0_MME_QM_BASE; 7915 break; 7916 case GAUDI2_EVENT_MME1_QM: 7917 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 7918 qman_base = mmDCORE1_MME_QM_BASE; 7919 break; 7920 case GAUDI2_EVENT_MME2_QM: 7921 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 7922 qman_base = mmDCORE2_MME_QM_BASE; 7923 break; 7924 case GAUDI2_EVENT_MME3_QM: 7925 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 7926 qman_base = mmDCORE3_MME_QM_BASE; 7927 break; 7928 case GAUDI2_EVENT_HDMA0_QM: 7929 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 7930 qman_base = mmDCORE0_EDMA0_QM_BASE; 7931 break; 7932 case GAUDI2_EVENT_HDMA1_QM: 7933 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 7934 qman_base = mmDCORE0_EDMA1_QM_BASE; 7935 break; 7936 case GAUDI2_EVENT_HDMA2_QM: 7937 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 7938 qman_base = mmDCORE1_EDMA0_QM_BASE; 7939 break; 7940 case GAUDI2_EVENT_HDMA3_QM: 7941 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 7942 qman_base = mmDCORE1_EDMA1_QM_BASE; 7943 break; 7944 case GAUDI2_EVENT_HDMA4_QM: 7945 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 7946 qman_base = mmDCORE2_EDMA0_QM_BASE; 7947 break; 7948 case GAUDI2_EVENT_HDMA5_QM: 7949 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 7950 qman_base = mmDCORE2_EDMA1_QM_BASE; 7951 break; 7952 case GAUDI2_EVENT_HDMA6_QM: 7953 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 7954 qman_base = mmDCORE3_EDMA0_QM_BASE; 7955 break; 7956 case GAUDI2_EVENT_HDMA7_QM: 7957 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 7958 qman_base = mmDCORE3_EDMA1_QM_BASE; 7959 break; 7960 case GAUDI2_EVENT_PDMA0_QM: 7961 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 7962 qman_base = mmPDMA0_QM_BASE; 7963 break; 7964 case GAUDI2_EVENT_PDMA1_QM: 7965 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 7966 qman_base = mmPDMA1_QM_BASE; 7967 break; 7968 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 7969 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 7970 qman_base = mmROT0_QM_BASE; 7971 break; 7972 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 7973 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 7974 qman_base = mmROT1_QM_BASE; 7975 break; 7976 default: 7977 return 0; 7978 } 7979 7980 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 7981 7982 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 7983 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) 7984 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7985 7986 return error_count; 7987 } 7988 7989 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 7990 { 7991 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7992 7993 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); 7994 7995 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 7996 if (sts_val & BIT(i)) { 7997 gaudi2_print_event(hdev, event_type, true, 7998 "err cause: %s", gaudi2_arc_sei_error_cause[i]); 7999 sts_clr_val |= BIT(i); 8000 error_count++; 8001 } 8002 } 8003 8004 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); 8005 8006 return error_count; 8007 } 8008 8009 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 8010 { 8011 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8012 8013 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 8014 8015 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 8016 if (sts_val & BIT(i)) { 8017 gaudi2_print_event(hdev, event_type, true, 8018 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 8019 sts_clr_val |= BIT(i); 8020 error_count++; 8021 } 8022 } 8023 8024 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 8025 8026 return error_count; 8027 } 8028 8029 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 8030 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8031 u64 *event_mask) 8032 { 8033 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8034 u32 error_count = 0; 8035 int i; 8036 8037 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 8038 if (intr_cause_data & BIT(i)) { 8039 gaudi2_print_event(hdev, event_type, true, 8040 "err cause: %s", guadi2_rot_error_cause[i]); 8041 error_count++; 8042 } 8043 8044 /* check if RAZWI happened */ 8045 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, 8046 &razwi_with_intr_cause->razwi_info, event_mask); 8047 8048 return error_count; 8049 } 8050 8051 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 8052 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8053 u64 *event_mask) 8054 { 8055 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8056 u32 error_count = 0; 8057 int i; 8058 8059 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 8060 if (intr_cause_data & BIT(i)) { 8061 gaudi2_print_event(hdev, event_type, true, 8062 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 8063 error_count++; 8064 } 8065 8066 /* check if RAZWI happened */ 8067 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, 8068 &razwi_with_intr_cause->razwi_info, event_mask); 8069 8070 return error_count; 8071 } 8072 8073 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8074 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8075 { 8076 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8077 int i; 8078 8079 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8080 /* DCORE DEC */ 8081 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8082 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8083 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8084 else 8085 /* PCIE DEC */ 8086 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8087 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8088 8089 sts_val = RREG32(sts_addr); 8090 8091 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8092 if (sts_val & BIT(i)) { 8093 gaudi2_print_event(hdev, event_type, true, 8094 "err cause: %s", gaudi2_dec_error_cause[i]); 8095 sts_clr_val |= BIT(i); 8096 error_count++; 8097 } 8098 } 8099 8100 /* check if RAZWI happened */ 8101 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info, 8102 event_mask); 8103 8104 /* Write 1 clear errors */ 8105 WREG32(sts_addr, sts_clr_val); 8106 8107 return error_count; 8108 } 8109 8110 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8111 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8112 { 8113 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8114 int i; 8115 8116 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8117 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8118 8119 sts_val = RREG32(sts_addr); 8120 8121 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8122 if (sts_val & BIT(i)) { 8123 gaudi2_print_event(hdev, event_type, true, 8124 "err cause: %s", guadi2_mme_error_cause[i]); 8125 sts_clr_val |= BIT(i); 8126 error_count++; 8127 } 8128 } 8129 8130 /* check if RAZWI happened */ 8131 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8132 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info, 8133 event_mask); 8134 8135 WREG32(sts_clr_addr, sts_clr_val); 8136 8137 return error_count; 8138 } 8139 8140 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8141 u64 intr_cause_data) 8142 { 8143 int i, error_count = 0; 8144 8145 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8146 if (intr_cause_data & BIT(i)) { 8147 gaudi2_print_event(hdev, event_type, true, 8148 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8149 error_count++; 8150 } 8151 8152 return error_count; 8153 } 8154 8155 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8156 struct hl_eq_razwi_info *razwi_info, u64 *event_mask) 8157 { 8158 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8159 int i; 8160 8161 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8162 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8163 8164 sts_val = RREG32(sts_addr); 8165 8166 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8167 if (sts_val & BIT(i)) { 8168 gaudi2_print_event(hdev, event_type, true, 8169 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8170 sts_clr_val |= BIT(i); 8171 error_count++; 8172 } 8173 } 8174 8175 /* check if RAZWI happened on WAP0/1 */ 8176 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info, 8177 event_mask); 8178 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info, 8179 event_mask); 8180 8181 WREG32(sts_clr_addr, sts_clr_val); 8182 8183 return error_count; 8184 } 8185 8186 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8187 u64 intr_cause_data) 8188 { 8189 u32 error_count = 0; 8190 int i; 8191 8192 /* If an AXI read or write error is received, an error is reported and 8193 * interrupt message is sent. Due to an HW errata, when reading the cause 8194 * register of the KDMA engine, the reported error is always HBW even if 8195 * the actual error caused by a LBW KDMA transaction. 8196 */ 8197 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8198 if (intr_cause_data & BIT(i)) { 8199 gaudi2_print_event(hdev, event_type, true, 8200 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8201 error_count++; 8202 } 8203 8204 return error_count; 8205 } 8206 8207 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, 8208 u64 intr_cause_data) 8209 { 8210 u32 error_count = 0; 8211 int i; 8212 8213 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8214 if (intr_cause_data & BIT(i)) { 8215 gaudi2_print_event(hdev, event_type, true, 8216 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8217 error_count++; 8218 } 8219 8220 return error_count; 8221 } 8222 8223 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8224 { 8225 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8226 8227 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8228 if (RREG32(razwi_happened_addr)) { 8229 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 8230 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8231 WREG32(razwi_happened_addr, 0x1); 8232 } 8233 8234 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8235 if (RREG32(razwi_happened_addr)) { 8236 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 8237 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8238 WREG32(razwi_happened_addr, 0x1); 8239 } 8240 8241 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8242 if (RREG32(razwi_happened_addr)) { 8243 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 8244 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8245 WREG32(razwi_happened_addr, 0x1); 8246 } 8247 8248 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8249 if (RREG32(razwi_happened_addr)) { 8250 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 8251 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); 8252 WREG32(razwi_happened_addr, 0x1); 8253 } 8254 } 8255 8256 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8257 u64 intr_cause_data, u64 *event_mask) 8258 { 8259 u32 error_count = 0; 8260 int i; 8261 8262 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8263 if (!(intr_cause_data & BIT_ULL(i))) 8264 continue; 8265 8266 gaudi2_print_event(hdev, event_type, true, 8267 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8268 error_count++; 8269 8270 switch (intr_cause_data & BIT_ULL(i)) { 8271 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 8272 break; 8273 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 8274 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8275 break; 8276 } 8277 } 8278 8279 return error_count; 8280 } 8281 8282 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8283 u64 intr_cause_data) 8284 8285 { 8286 u32 error_count = 0; 8287 int i; 8288 8289 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8290 if (intr_cause_data & BIT_ULL(i)) { 8291 gaudi2_print_event(hdev, event_type, true, 8292 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8293 error_count++; 8294 } 8295 } 8296 8297 return error_count; 8298 } 8299 8300 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8301 { 8302 u32 error_count = 0; 8303 int i; 8304 8305 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8306 if (intr_cause_data & BIT_ULL(i)) { 8307 gaudi2_print_event(hdev, event_type, true, 8308 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8309 error_count++; 8310 } 8311 } 8312 8313 return error_count; 8314 } 8315 8316 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8317 u64 *event_mask) 8318 { 8319 u32 valid, val; 8320 u64 addr; 8321 8322 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8323 8324 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8325 return; 8326 8327 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8328 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8329 addr <<= 32; 8330 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8331 8332 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", 8333 is_pmmu ? "PMMU" : "HMMU", addr); 8334 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8335 8336 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); 8337 } 8338 8339 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8340 { 8341 u32 valid, val; 8342 u64 addr; 8343 8344 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8345 8346 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8347 return; 8348 8349 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8350 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8351 addr <<= 32; 8352 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8353 8354 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8355 is_pmmu ? "PMMU" : "HMMU", addr); 8356 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); 8357 } 8358 8359 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8360 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8361 { 8362 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8363 int i; 8364 8365 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8366 8367 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8368 if (spi_sei_cause & BIT(i)) { 8369 gaudi2_print_event(hdev, event_type, true, 8370 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8371 8372 if (i == 0) 8373 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8374 else if (i == 1) 8375 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8376 8377 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8378 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8379 8380 error_count++; 8381 } 8382 } 8383 8384 /* Clear cause */ 8385 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8386 8387 /* Clear interrupt */ 8388 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8389 8390 return error_count; 8391 } 8392 8393 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 8394 { 8395 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 8396 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 8397 int i; 8398 8399 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 8400 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 8401 8402 sei_cause_val = RREG32(sei_cause_addr); 8403 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 8404 cq_intr_val = RREG32(cq_intr_addr); 8405 8406 /* SEI interrupt */ 8407 if (sei_cause_cause) { 8408 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 8409 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 8410 sei_cause_val); 8411 8412 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 8413 if (!(sei_cause_cause & BIT(i))) 8414 continue; 8415 8416 gaudi2_print_event(hdev, event_type, true, 8417 "err cause: %s. %s: 0x%X\n", 8418 gaudi2_sm_sei_cause[i].cause_name, 8419 gaudi2_sm_sei_cause[i].log_name, 8420 sei_cause_log); 8421 error_count++; 8422 break; 8423 } 8424 8425 /* Clear SM_SEI_CAUSE */ 8426 WREG32(sei_cause_addr, 0); 8427 } 8428 8429 /* CQ interrupt */ 8430 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 8431 cq_intr_queue_index = 8432 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 8433 cq_intr_val); 8434 8435 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 8436 sm_index, cq_intr_queue_index); 8437 error_count++; 8438 8439 /* Clear CQ_INTR */ 8440 WREG32(cq_intr_addr, 0); 8441 } 8442 8443 return error_count; 8444 } 8445 8446 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8447 { 8448 bool is_pmmu = false; 8449 u32 error_count = 0; 8450 u64 mmu_base; 8451 u8 index; 8452 8453 switch (event_type) { 8454 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 8455 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; 8456 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8457 break; 8458 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 8459 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); 8460 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8461 break; 8462 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 8463 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; 8464 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8465 break; 8466 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 8467 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); 8468 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8469 break; 8470 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 8471 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; 8472 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8473 break; 8474 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 8475 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); 8476 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8477 break; 8478 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 8479 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; 8480 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8481 break; 8482 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 8483 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); 8484 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8485 break; 8486 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 8487 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8488 is_pmmu = true; 8489 mmu_base = mmPMMU_HBW_MMU_BASE; 8490 break; 8491 default: 8492 return 0; 8493 } 8494 8495 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 8496 is_pmmu, event_mask); 8497 8498 return error_count; 8499 } 8500 8501 8502 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 8503 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 8504 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 8505 { 8506 u32 addr, beat, beat_shift; 8507 bool rc = false; 8508 8509 dev_err_ratelimited(hdev->dev, 8510 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 8511 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 8512 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 8513 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 8514 8515 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 8516 dev_err_ratelimited(hdev->dev, 8517 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 8518 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 8519 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 8520 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 8521 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 8522 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 8523 8524 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 8525 for (beat = 0 ; beat < 4 ; beat++) { 8526 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8527 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 8528 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 8529 beat, 8530 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8531 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8532 8533 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8534 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 8535 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 8536 beat, 8537 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8538 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8539 rc |= true; 8540 } 8541 8542 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 8543 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8544 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 8545 dev_err_ratelimited(hdev->dev, 8546 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 8547 beat, 8548 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8549 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8550 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 8551 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 8552 rc |= true; 8553 } 8554 8555 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 8556 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8557 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 8558 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8559 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 8560 } 8561 8562 return rc; 8563 } 8564 8565 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 8566 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 8567 { 8568 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 8569 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 8570 8571 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 8572 8573 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 8574 derr & 0x3, derr & 0xc); 8575 8576 /* JIRA H6-3286 - the following prints may not be valid */ 8577 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 8578 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 8579 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 8580 dev_err_ratelimited(hdev->dev, 8581 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 8582 i, 8583 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 8584 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 8585 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 8586 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 8587 } 8588 } 8589 8590 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 8591 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 8592 { 8593 __le32 *col_cmd = ca_par_err_data->dbg_col; 8594 __le16 *row_cmd = ca_par_err_data->dbg_row; 8595 u32 i; 8596 8597 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 8598 8599 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 8600 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 8601 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 8602 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 8603 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 8604 } 8605 8606 /* Returns true if hard reset is needed or false otherwise */ 8607 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 8608 struct hl_eq_hbm_sei_data *sei_data) 8609 { 8610 bool require_hard_reset = false; 8611 u32 hbm_id, mc_id, cause_idx; 8612 8613 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 8614 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 8615 8616 cause_idx = sei_data->hdr.sei_cause; 8617 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 8618 gaudi2_print_event(hdev, event_type, true, 8619 "err cause: %s", 8620 "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx); 8621 return true; 8622 } 8623 8624 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 8625 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8626 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 8627 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8628 hbm_mc_sei_cause[cause_idx]); 8629 8630 /* Print error-specific info */ 8631 switch (cause_idx) { 8632 case HBM_SEI_CATTRIP: 8633 require_hard_reset = true; 8634 break; 8635 8636 case HBM_SEI_CMD_PARITY_EVEN: 8637 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 8638 le32_to_cpu(sei_data->hdr.cnt)); 8639 require_hard_reset = true; 8640 break; 8641 8642 case HBM_SEI_CMD_PARITY_ODD: 8643 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 8644 le32_to_cpu(sei_data->hdr.cnt)); 8645 require_hard_reset = true; 8646 break; 8647 8648 case HBM_SEI_WRITE_DATA_PARITY_ERR: 8649 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 8650 le32_to_cpu(sei_data->hdr.cnt)); 8651 require_hard_reset = true; 8652 break; 8653 8654 case HBM_SEI_READ_ERR: 8655 /* Unlike other SEI events, read error requires further processing of the 8656 * raw data in order to determine the root cause. 8657 */ 8658 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 8659 &sei_data->read_err_info, 8660 le32_to_cpu(sei_data->hdr.cnt)); 8661 break; 8662 8663 default: 8664 break; 8665 } 8666 8667 require_hard_reset |= !!sei_data->hdr.is_critical; 8668 8669 return require_hard_reset; 8670 } 8671 8672 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 8673 u64 intr_cause_data) 8674 { 8675 if (intr_cause_data) { 8676 gaudi2_print_event(hdev, event_type, true, 8677 "temperature error cause: %#llx", intr_cause_data); 8678 return 1; 8679 } 8680 8681 return 0; 8682 } 8683 8684 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 8685 { 8686 u32 i, error_count = 0; 8687 8688 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 8689 if (intr_cause_data & hbm_mc_spi[i].mask) { 8690 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 8691 hbm_mc_spi[i].cause); 8692 error_count++; 8693 } 8694 8695 return error_count; 8696 } 8697 8698 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8699 { 8700 ktime_t zero_time = ktime_set(0, 0); 8701 8702 mutex_lock(&hdev->clk_throttling.lock); 8703 8704 switch (event_type) { 8705 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 8706 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 8707 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 8708 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 8709 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 8710 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 8711 break; 8712 8713 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 8714 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 8715 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 8716 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 8717 break; 8718 8719 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 8720 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 8721 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 8722 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 8723 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 8724 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8725 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 8726 break; 8727 8728 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 8729 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 8730 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 8731 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8732 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 8733 break; 8734 8735 default: 8736 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 8737 break; 8738 } 8739 8740 mutex_unlock(&hdev->clk_throttling.lock); 8741 } 8742 8743 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 8744 struct cpucp_pkt_sync_err *sync_err) 8745 { 8746 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8747 8748 gaudi2_print_event(hdev, event_type, false, 8749 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8750 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 8751 q->pi, atomic_read(&q->ci)); 8752 } 8753 8754 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 8755 { 8756 u32 p2p_intr, msix_gw_intr, error_count = 0; 8757 8758 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 8759 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 8760 8761 if (p2p_intr) { 8762 gaudi2_print_event(hdev, event_type, true, 8763 "pcie p2p transaction terminated due to security, req_id(0x%x)\n", 8764 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 8765 8766 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 8767 error_count++; 8768 } 8769 8770 if (msix_gw_intr) { 8771 gaudi2_print_event(hdev, event_type, true, 8772 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n", 8773 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 8774 8775 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 8776 error_count++; 8777 } 8778 8779 return error_count; 8780 } 8781 8782 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 8783 struct hl_eq_pcie_drain_ind_data *drain_data) 8784 { 8785 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 8786 8787 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 8788 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 8789 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 8790 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 8791 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 8792 8793 if (cause & BIT_ULL(0)) { 8794 dev_err_ratelimited(hdev->dev, 8795 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 8796 !!lbw_rd, !!lbw_wr); 8797 error_count++; 8798 } 8799 8800 if (cause & BIT_ULL(1)) { 8801 dev_err_ratelimited(hdev->dev, 8802 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 8803 hbw_rd, hbw_wr); 8804 error_count++; 8805 } 8806 8807 return error_count; 8808 } 8809 8810 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 8811 { 8812 u32 error_count = 0; 8813 int i; 8814 8815 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 8816 if (intr_cause_data & BIT_ULL(i)) { 8817 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 8818 gaudi2_psoc_axi_drain_interrupts_cause[i]); 8819 error_count++; 8820 } 8821 } 8822 8823 return error_count; 8824 } 8825 8826 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 8827 struct cpucp_pkt_sync_err *sync_err) 8828 { 8829 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8830 8831 gaudi2_print_event(hdev, event_type, false, 8832 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8833 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 8834 } 8835 8836 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 8837 struct hl_eq_engine_arc_intr_data *data) 8838 { 8839 struct hl_engine_arc_dccm_queue_full_irq *q; 8840 u32 intr_type, engine_id; 8841 u64 payload; 8842 8843 intr_type = le32_to_cpu(data->intr_type); 8844 engine_id = le32_to_cpu(data->engine_id); 8845 payload = le64_to_cpu(data->payload); 8846 8847 switch (intr_type) { 8848 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 8849 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 8850 8851 gaudi2_print_event(hdev, event_type, true, 8852 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n", 8853 engine_id, intr_type, q->queue_index); 8854 return 1; 8855 default: 8856 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n"); 8857 return 0; 8858 } 8859 } 8860 8861 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 8862 { 8863 struct gaudi2_device *gaudi2 = hdev->asic_specific; 8864 bool reset_required = false, is_critical = false; 8865 u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0; 8866 u64 event_mask = 0; 8867 u16 event_type; 8868 8869 ctl = le32_to_cpu(eq_entry->hdr.ctl); 8870 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 8871 8872 if (event_type >= GAUDI2_EVENT_SIZE) { 8873 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 8874 event_type, GAUDI2_EVENT_SIZE - 1); 8875 return; 8876 } 8877 8878 gaudi2->events_stat[event_type]++; 8879 gaudi2->events_stat_aggregate[event_type]++; 8880 8881 switch (event_type) { 8882 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 8883 fallthrough; 8884 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 8885 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8886 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8887 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 8888 is_critical = eq_entry->ecc_data.is_critical; 8889 error_count++; 8890 break; 8891 8892 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 8893 fallthrough; 8894 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8895 fallthrough; 8896 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 8897 error_count = gaudi2_handle_qman_err(hdev, event_type); 8898 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8899 break; 8900 8901 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 8902 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8903 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 8904 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8905 break; 8906 8907 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 8908 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 8909 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8910 break; 8911 8912 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8913 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8914 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8915 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, 8916 &eq_entry->razwi_info, &event_mask); 8917 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8918 break; 8919 8920 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 8921 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 8922 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8923 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 8924 &eq_entry->razwi_with_intr_cause, &event_mask); 8925 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 8926 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8927 break; 8928 8929 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 8930 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 8931 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8932 &eq_entry->razwi_with_intr_cause, &event_mask); 8933 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 8934 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8935 break; 8936 8937 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 8938 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 8939 error_count = gaudi2_handle_dec_err(hdev, index, event_type, 8940 &eq_entry->razwi_info, &event_mask); 8941 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8942 break; 8943 8944 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 8945 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 8946 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 8947 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 8948 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 8949 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 8950 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 8951 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 8952 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 8953 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 8954 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 8955 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 8956 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 8957 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 8958 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 8959 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 8960 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 8961 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 8962 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 8963 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 8964 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 8965 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 8966 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 8967 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 8968 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 8969 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 8970 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 8971 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8972 &eq_entry->razwi_with_intr_cause, &event_mask); 8973 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8974 break; 8975 8976 case GAUDI2_EVENT_DEC0_SPI: 8977 case GAUDI2_EVENT_DEC1_SPI: 8978 case GAUDI2_EVENT_DEC2_SPI: 8979 case GAUDI2_EVENT_DEC3_SPI: 8980 case GAUDI2_EVENT_DEC4_SPI: 8981 case GAUDI2_EVENT_DEC5_SPI: 8982 case GAUDI2_EVENT_DEC6_SPI: 8983 case GAUDI2_EVENT_DEC7_SPI: 8984 case GAUDI2_EVENT_DEC8_SPI: 8985 case GAUDI2_EVENT_DEC9_SPI: 8986 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 8987 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 8988 error_count = gaudi2_handle_dec_err(hdev, index, event_type, 8989 &eq_entry->razwi_info, &event_mask); 8990 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8991 break; 8992 8993 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 8994 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 8995 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 8996 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 8997 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 8998 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 8999 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 9000 error_count = gaudi2_handle_mme_err(hdev, index, event_type, 9001 &eq_entry->razwi_info, &event_mask); 9002 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); 9003 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9004 break; 9005 9006 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 9007 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 9008 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 9009 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 9010 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 9011 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 9012 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 9013 error_count = gaudi2_handle_mme_err(hdev, index, event_type, 9014 &eq_entry->razwi_info, &event_mask); 9015 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9016 break; 9017 9018 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 9019 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 9020 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 9021 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 9022 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 9023 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 9024 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 9025 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, 9026 &eq_entry->razwi_info, &event_mask); 9027 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9028 break; 9029 9030 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 9031 case GAUDI2_EVENT_KDMA0_CORE: 9032 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 9033 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9034 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9035 break; 9036 9037 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: 9038 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 9039 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9040 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9041 break; 9042 9043 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 9044 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 9045 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 9046 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9047 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9048 break; 9049 9050 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9051 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9052 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9053 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9054 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 9055 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9056 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9057 break; 9058 9059 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 9060 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 9061 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9062 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9063 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9064 break; 9065 9066 case GAUDI2_EVENT_PMMU_FATAL_0: 9067 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 9068 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9069 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9070 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9071 break; 9072 9073 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 9074 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 9075 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9076 break; 9077 9078 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9079 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9080 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9081 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9082 reset_required = true; 9083 } 9084 error_count++; 9085 break; 9086 9087 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9088 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9089 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9090 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9091 break; 9092 9093 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9094 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9095 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9096 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9097 break; 9098 9099 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9100 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9101 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9102 break; 9103 9104 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9105 error_count = gaudi2_handle_psoc_drain(hdev, 9106 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9107 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9108 break; 9109 9110 case GAUDI2_EVENT_CPU_AXI_ECC: 9111 error_count = GAUDI2_NA_EVENT_CAUSE; 9112 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9113 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9114 break; 9115 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9116 error_count = GAUDI2_NA_EVENT_CAUSE; 9117 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9118 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9119 break; 9120 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9121 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9122 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9123 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9124 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9125 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9126 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9127 break; 9128 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9129 error_count = GAUDI2_NA_EVENT_CAUSE; 9130 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9131 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9132 break; 9133 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9134 error_count = GAUDI2_NA_EVENT_CAUSE; 9135 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9136 break; 9137 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9138 error_count = GAUDI2_NA_EVENT_CAUSE; 9139 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9140 break; 9141 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9142 error_count = GAUDI2_NA_EVENT_CAUSE; 9143 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9144 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9145 break; 9146 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9147 error_count = GAUDI2_NA_EVENT_CAUSE; 9148 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9149 break; 9150 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9151 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9152 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9153 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9154 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9155 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9156 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9157 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9158 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9159 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9160 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9161 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9162 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9163 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9164 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9165 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9166 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9167 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9168 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9169 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9170 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9171 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9172 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9173 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9174 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9175 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9176 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9177 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9178 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9179 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9180 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9181 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9182 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9183 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9184 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9185 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9186 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9187 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9188 fallthrough; 9189 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9190 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9191 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9192 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9193 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9194 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9195 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9196 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9197 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9198 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9199 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9200 error_count = GAUDI2_NA_EVENT_CAUSE; 9201 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9202 break; 9203 9204 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9205 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9206 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9207 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9208 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9209 error_count = GAUDI2_NA_EVENT_CAUSE; 9210 break; 9211 9212 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9213 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9214 error_count = GAUDI2_NA_EVENT_CAUSE; 9215 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9216 break; 9217 9218 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9219 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9220 error_count = GAUDI2_NA_EVENT_CAUSE; 9221 /* Do nothing- FW will handle it */ 9222 break; 9223 9224 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9225 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9226 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9227 break; 9228 9229 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9230 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9231 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9232 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9233 break; 9234 9235 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9236 error_count = GAUDI2_NA_EVENT_CAUSE; 9237 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9238 break; 9239 9240 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9241 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9242 le64_to_cpu(eq_entry->data[0])); 9243 error_count = GAUDI2_NA_EVENT_CAUSE; 9244 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9245 break; 9246 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9247 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9248 le64_to_cpu(eq_entry->data[0])); 9249 error_count = GAUDI2_NA_EVENT_CAUSE; 9250 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9251 break; 9252 9253 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9254 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9255 error_count = GAUDI2_NA_EVENT_CAUSE; 9256 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9257 break; 9258 9259 case GAUDI2_EVENT_ARC_DCCM_FULL: 9260 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9261 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9262 break; 9263 9264 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9265 case GAUDI2_EVENT_DEV_RESET_REQ: 9266 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9267 error_count = GAUDI2_NA_EVENT_CAUSE; 9268 is_critical = true; 9269 break; 9270 9271 default: 9272 if (gaudi2_irq_map_table[event_type].valid) { 9273 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9274 event_type); 9275 error_count = GAUDI2_NA_EVENT_CAUSE; 9276 } 9277 } 9278 9279 /* Make sure to dump an error in case no error cause was printed so far. 9280 * Note that although we have counted the errors, we use this number as 9281 * a boolean. 9282 */ 9283 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9284 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9285 else if (error_count == 0) 9286 gaudi2_print_event(hdev, event_type, true, 9287 "No error cause for H/W event %u\n", event_type); 9288 9289 if ((gaudi2_irq_map_table[event_type].reset || reset_required) && 9290 (hdev->hard_reset_on_fw_events || 9291 (hdev->asic_prop.fw_security_enabled && is_critical))) 9292 goto reset_device; 9293 9294 /* Send unmask irq only for interrupts not classified as MSG */ 9295 if (!gaudi2_irq_map_table[event_type].msg) 9296 hl_fw_unmask_irq(hdev, event_type); 9297 9298 if (event_mask) 9299 hl_notifier_event_send_all(hdev, event_mask); 9300 9301 return; 9302 9303 reset_device: 9304 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9305 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9306 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9307 } else { 9308 reset_flags |= HL_DRV_RESET_DELAY; 9309 } 9310 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 9311 hl_device_cond_reset(hdev, reset_flags, event_mask); 9312 } 9313 9314 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 9315 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 9316 u32 hw_queue_id, u32 size, u64 addr, u32 val) 9317 { 9318 u32 ctl, pkt_size; 9319 int rc = 0; 9320 9321 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 9322 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 9323 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 9324 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 9325 9326 lin_dma_pkt->ctl = cpu_to_le32(ctl); 9327 lin_dma_pkt->src_addr = cpu_to_le64(val); 9328 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 9329 lin_dma_pkt->tsize = cpu_to_le32(size); 9330 9331 pkt_size = sizeof(struct packet_lin_dma); 9332 9333 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 9334 if (rc) 9335 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 9336 hw_queue_id); 9337 9338 return rc; 9339 } 9340 9341 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 9342 { 9343 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 9344 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 9345 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 9346 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 9347 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 9348 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 9349 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 9350 struct asic_fixed_properties *prop = &hdev->asic_prop; 9351 void *lin_dma_pkts_arr; 9352 dma_addr_t pkt_dma_addr; 9353 int rc = 0, dma_num = 0; 9354 9355 if (prop->edma_enabled_mask == 0) { 9356 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 9357 return -EIO; 9358 } 9359 9360 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9361 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 9362 comp_addr = CFG_BASE + sob_addr; 9363 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 9364 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 9365 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 9366 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 9367 9368 /* Calculate how many lin dma pkts we'll need */ 9369 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 9370 pkt_size = sizeof(struct packet_lin_dma); 9371 9372 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 9373 &pkt_dma_addr, GFP_KERNEL); 9374 if (!lin_dma_pkts_arr) 9375 return -ENOMEM; 9376 9377 /* 9378 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 9379 * only the first one to restore later 9380 * also set the sob addr for all edma cores for completion. 9381 * set QM as trusted to allow it to access physical address with MMU bp. 9382 */ 9383 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 9384 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9385 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9386 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9387 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9388 9389 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9390 continue; 9391 9392 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 9393 edma_offset, mmubp); 9394 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 9395 lower_32_bits(comp_addr)); 9396 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 9397 upper_32_bits(comp_addr)); 9398 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 9399 comp_val); 9400 gaudi2_qman_set_test_mode(hdev, 9401 edma_queues_id[dcore] + 4 * edma_idx, true); 9402 } 9403 } 9404 9405 WREG32(sob_addr, 0); 9406 9407 while (cur_addr < end_addr) { 9408 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9409 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9410 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9411 9412 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9413 continue; 9414 9415 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 9416 9417 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 9418 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 9419 pkt_dma_addr + dma_num * pkt_size, 9420 edma_queues_id[dcore] + edma_idx * 4, 9421 chunk_size, cur_addr, val); 9422 if (rc) 9423 goto end; 9424 9425 dma_num++; 9426 cur_addr += chunk_size; 9427 if (cur_addr == end_addr) 9428 break; 9429 } 9430 } 9431 } 9432 9433 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 9434 if (rc) { 9435 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 9436 goto end; 9437 } 9438 end: 9439 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9440 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9441 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9442 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9443 9444 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9445 continue; 9446 9447 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 9448 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 9449 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 9450 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 9451 gaudi2_qman_set_test_mode(hdev, 9452 edma_queues_id[dcore] + 4 * edma_idx, false); 9453 } 9454 } 9455 9456 WREG32(sob_addr, 0); 9457 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 9458 9459 return rc; 9460 } 9461 9462 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 9463 { 9464 int rc; 9465 struct asic_fixed_properties *prop = &hdev->asic_prop; 9466 u64 size = prop->dram_end_address - prop->dram_user_base_address; 9467 9468 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 9469 9470 if (rc) 9471 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 9472 prop->dram_user_base_address, size); 9473 return rc; 9474 } 9475 9476 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 9477 { 9478 int rc; 9479 struct asic_fixed_properties *prop = &hdev->asic_prop; 9480 u64 val = hdev->memory_scrub_val; 9481 u64 addr, size; 9482 9483 if (!hdev->memory_scrub) 9484 return 0; 9485 9486 /* scrub SRAM */ 9487 addr = prop->sram_user_base_address; 9488 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 9489 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 9490 addr, addr + size, val); 9491 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 9492 if (rc) { 9493 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 9494 return rc; 9495 } 9496 9497 /* scrub DRAM */ 9498 rc = gaudi2_scrub_device_dram(hdev, val); 9499 if (rc) { 9500 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 9501 return rc; 9502 } 9503 return 0; 9504 } 9505 9506 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 9507 { 9508 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 9509 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 9510 u32 val, size, offset; 9511 int dcore_id; 9512 9513 offset = hdev->asic_prop.first_available_cq[0] * 4; 9514 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 9515 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 9516 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 9517 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 9518 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 9519 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 9520 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 9521 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 9522 9523 /* memset dcore0 CQ registers */ 9524 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9525 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9526 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9527 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9528 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9529 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9530 9531 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 9532 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 9533 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 9534 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 9535 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 9536 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 9537 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 9538 9539 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9540 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9541 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9542 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9543 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9544 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9545 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9546 9547 cq_lbw_l_addr += DCORE_OFFSET; 9548 cq_lbw_h_addr += DCORE_OFFSET; 9549 cq_lbw_data_addr += DCORE_OFFSET; 9550 cq_base_l_addr += DCORE_OFFSET; 9551 cq_base_h_addr += DCORE_OFFSET; 9552 cq_size_addr += DCORE_OFFSET; 9553 } 9554 9555 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 9556 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 9557 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 9558 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 9559 9560 /* memset dcore0 monitors */ 9561 gaudi2_memset_device_lbw(hdev, addr, size, val); 9562 9563 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 9564 gaudi2_memset_device_lbw(hdev, addr, size, 0); 9565 9566 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 9567 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 9568 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 9569 9570 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9571 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 9572 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 9573 mon_sts_addr += DCORE_OFFSET; 9574 mon_cfg_addr += DCORE_OFFSET; 9575 } 9576 9577 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9578 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 9579 val = 0; 9580 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 9581 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9582 9583 /* memset dcore0 sobs */ 9584 gaudi2_memset_device_lbw(hdev, addr, size, val); 9585 9586 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 9587 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 9588 9589 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9590 gaudi2_memset_device_lbw(hdev, addr, size, val); 9591 addr += DCORE_OFFSET; 9592 } 9593 9594 /* Flush all WREG to prevent race */ 9595 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9596 } 9597 9598 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 9599 { 9600 u32 reg_base, hw_queue_id; 9601 9602 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 9603 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9604 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9605 continue; 9606 9607 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9608 9609 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9610 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9611 } 9612 9613 /* Flush all WREG to prevent race */ 9614 RREG32(mmPDMA0_QM_ARB_CFG_0); 9615 } 9616 9617 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 9618 { 9619 u32 reg_base, hw_queue_id; 9620 9621 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 9622 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9623 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9624 continue; 9625 9626 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9627 9628 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9629 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9630 } 9631 9632 /* Flush all WREG to prevent race */ 9633 RREG32(mmPDMA0_QM_ARB_CFG_0); 9634 } 9635 9636 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 9637 { 9638 return 0; 9639 } 9640 9641 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 9642 { 9643 } 9644 9645 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 9646 struct dup_block_ctx *cfg_ctx) 9647 { 9648 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 9649 u8 seq; 9650 int i; 9651 9652 for (i = 0 ; i < cfg_ctx->instances ; i++) { 9653 seq = block_idx * cfg_ctx->instances + i; 9654 9655 /* skip disabled instance */ 9656 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 9657 continue; 9658 9659 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 9660 cfg_ctx->data); 9661 } 9662 } 9663 9664 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 9665 u64 mask) 9666 { 9667 int i; 9668 9669 cfg_ctx->enabled_mask = mask; 9670 9671 for (i = 0 ; i < cfg_ctx->blocks ; i++) 9672 gaudi2_init_block_instances(hdev, i, cfg_ctx); 9673 } 9674 9675 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 9676 { 9677 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 9678 } 9679 9680 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 9681 { 9682 void *host_mem_virtual_addr; 9683 dma_addr_t host_mem_dma_addr; 9684 u64 reserved_va_base; 9685 u32 pos, size_left, size_to_dma; 9686 struct hl_ctx *ctx; 9687 int rc = 0; 9688 9689 /* Fetch the ctx */ 9690 ctx = hl_get_compute_ctx(hdev); 9691 if (!ctx) { 9692 dev_err(hdev->dev, "No ctx available\n"); 9693 return -EINVAL; 9694 } 9695 9696 /* Allocate buffers for read and for poll */ 9697 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 9698 GFP_KERNEL | __GFP_ZERO); 9699 if (host_mem_virtual_addr == NULL) { 9700 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 9701 rc = -ENOMEM; 9702 goto put_ctx; 9703 } 9704 9705 /* Reserve VM region on asic side */ 9706 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 9707 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9708 if (!reserved_va_base) { 9709 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 9710 rc = -ENOMEM; 9711 goto free_data_buffer; 9712 } 9713 9714 /* Create mapping on asic side */ 9715 mutex_lock(&hdev->mmu_lock); 9716 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 9717 hl_mmu_invalidate_cache_range(hdev, false, 9718 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 9719 ctx->asid, reserved_va_base, SZ_2M); 9720 mutex_unlock(&hdev->mmu_lock); 9721 if (rc) { 9722 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 9723 goto unreserve_va; 9724 } 9725 9726 /* Enable MMU on KDMA */ 9727 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 9728 9729 pos = 0; 9730 size_left = size; 9731 size_to_dma = SZ_2M; 9732 9733 while (size_left > 0) { 9734 if (size_left < SZ_2M) 9735 size_to_dma = size_left; 9736 9737 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 9738 if (rc) 9739 break; 9740 9741 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 9742 9743 if (size_left <= SZ_2M) 9744 break; 9745 9746 pos += SZ_2M; 9747 addr += SZ_2M; 9748 size_left -= SZ_2M; 9749 } 9750 9751 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 9752 9753 mutex_lock(&hdev->mmu_lock); 9754 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 9755 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 9756 ctx->asid, reserved_va_base, SZ_2M); 9757 mutex_unlock(&hdev->mmu_lock); 9758 unreserve_va: 9759 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 9760 free_data_buffer: 9761 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 9762 put_ctx: 9763 hl_ctx_put(ctx); 9764 9765 return rc; 9766 } 9767 9768 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 9769 { 9770 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9771 int min_alloc_order, rc; 9772 9773 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9774 return 0; 9775 9776 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 9777 HOST_SPACE_INTERNAL_CB_SZ, 9778 &hdev->internal_cb_pool_dma_addr, 9779 GFP_KERNEL | __GFP_ZERO); 9780 9781 if (!hdev->internal_cb_pool_virt_addr) 9782 return -ENOMEM; 9783 9784 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 9785 gaudi2_get_wait_cb_size(hdev))); 9786 9787 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 9788 if (!hdev->internal_cb_pool) { 9789 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 9790 rc = -ENOMEM; 9791 goto free_internal_cb_pool; 9792 } 9793 9794 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 9795 HOST_SPACE_INTERNAL_CB_SZ, -1); 9796 if (rc) { 9797 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 9798 rc = -EFAULT; 9799 goto destroy_internal_cb_pool; 9800 } 9801 9802 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 9803 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9804 9805 if (!hdev->internal_cb_va_base) { 9806 rc = -ENOMEM; 9807 goto destroy_internal_cb_pool; 9808 } 9809 9810 mutex_lock(&hdev->mmu_lock); 9811 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 9812 HOST_SPACE_INTERNAL_CB_SZ); 9813 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 9814 mutex_unlock(&hdev->mmu_lock); 9815 9816 if (rc) 9817 goto unreserve_internal_cb_pool; 9818 9819 return 0; 9820 9821 unreserve_internal_cb_pool: 9822 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9823 destroy_internal_cb_pool: 9824 gen_pool_destroy(hdev->internal_cb_pool); 9825 free_internal_cb_pool: 9826 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9827 hdev->internal_cb_pool_dma_addr); 9828 9829 return rc; 9830 } 9831 9832 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 9833 { 9834 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9835 9836 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9837 return; 9838 9839 mutex_lock(&hdev->mmu_lock); 9840 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9841 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9842 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 9843 mutex_unlock(&hdev->mmu_lock); 9844 9845 gen_pool_destroy(hdev->internal_cb_pool); 9846 9847 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9848 hdev->internal_cb_pool_dma_addr); 9849 } 9850 9851 static void gaudi2_restore_user_registers(struct hl_device *hdev) 9852 { 9853 gaudi2_restore_user_sm_registers(hdev); 9854 gaudi2_restore_user_qm_registers(hdev); 9855 } 9856 9857 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9858 { 9859 struct hl_device *hdev = ctx->hdev; 9860 struct asic_fixed_properties *prop = &hdev->asic_prop; 9861 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9862 int rc; 9863 9864 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9865 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 9866 if (rc) 9867 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 9868 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9869 9870 return rc; 9871 } 9872 9873 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9874 { 9875 struct hl_device *hdev = ctx->hdev; 9876 struct asic_fixed_properties *prop = &hdev->asic_prop; 9877 int rc; 9878 9879 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9880 prop->pmmu.page_size, true); 9881 if (rc) 9882 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 9883 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9884 } 9885 9886 static int gaudi2_ctx_init(struct hl_ctx *ctx) 9887 { 9888 int rc; 9889 9890 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 9891 if (rc) 9892 return rc; 9893 9894 /* No need to clear user registers if the device has just 9895 * performed reset, we restore only nic qm registers 9896 */ 9897 if (ctx->hdev->reset_upon_device_release) 9898 gaudi2_restore_nic_qm_registers(ctx->hdev); 9899 else 9900 gaudi2_restore_user_registers(ctx->hdev); 9901 9902 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 9903 if (rc) 9904 return rc; 9905 9906 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 9907 if (rc) 9908 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9909 9910 return rc; 9911 } 9912 9913 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 9914 { 9915 if (ctx->asid == HL_KERNEL_ASID_ID) 9916 return; 9917 9918 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9919 9920 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 9921 } 9922 9923 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 9924 { 9925 struct hl_device *hdev = cs->ctx->hdev; 9926 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 9927 u32 mon_payload, sob_id, mon_id; 9928 9929 if (!cs_needs_completion(cs)) 9930 return 0; 9931 9932 /* 9933 * First 64 SOB/MON are reserved for driver for QMAN auto completion 9934 * mechanism. Each SOB/MON pair are used for a pending CS with the same 9935 * cyclic index. The SOB value is increased when each of the CS jobs is 9936 * completed. When the SOB reaches the number of CS jobs, the monitor 9937 * generates MSI-X interrupt. 9938 */ 9939 9940 sob_id = mon_id = index; 9941 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 9942 (1 << CQ_ENTRY_READY_SHIFT) | index; 9943 9944 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 9945 cs->jobs_cnt); 9946 9947 return 0; 9948 } 9949 9950 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 9951 { 9952 return HL_INVALID_QUEUE; 9953 } 9954 9955 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 9956 { 9957 struct hl_cb *cb = data; 9958 struct packet_msg_short *pkt; 9959 u32 value, ctl, pkt_size = sizeof(*pkt); 9960 9961 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 9962 memset(pkt, 0, pkt_size); 9963 9964 /* Inc by 1, Mode ADD */ 9965 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 9966 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 9967 9968 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 9969 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 9970 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9971 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 9972 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 9973 9974 pkt->value = cpu_to_le32(value); 9975 pkt->ctl = cpu_to_le32(ctl); 9976 9977 return size + pkt_size; 9978 } 9979 9980 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 9981 { 9982 u32 ctl, pkt_size = sizeof(*pkt); 9983 9984 memset(pkt, 0, pkt_size); 9985 9986 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 9987 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 9988 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9989 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 9990 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 9991 9992 pkt->value = cpu_to_le32(value); 9993 pkt->ctl = cpu_to_le32(ctl); 9994 9995 return pkt_size; 9996 } 9997 9998 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 9999 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 10000 { 10001 u32 ctl, value, pkt_size = sizeof(*pkt); 10002 u8 mask; 10003 10004 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 10005 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 10006 return 0; 10007 } 10008 10009 memset(pkt, 0, pkt_size); 10010 10011 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 10012 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 10013 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 10014 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 10015 10016 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10017 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10018 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10019 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10020 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10021 10022 pkt->value = cpu_to_le32(value); 10023 pkt->ctl = cpu_to_le32(ctl); 10024 10025 return pkt_size; 10026 } 10027 10028 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 10029 { 10030 u32 ctl, cfg, pkt_size = sizeof(*pkt); 10031 10032 memset(pkt, 0, pkt_size); 10033 10034 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 10035 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 10036 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 10037 10038 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 10039 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10040 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10041 10042 pkt->cfg = cpu_to_le32(cfg); 10043 pkt->ctl = cpu_to_le32(ctl); 10044 10045 return pkt_size; 10046 } 10047 10048 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 10049 { 10050 struct hl_cb *cb = prop->data; 10051 void *buf = (void *) (uintptr_t) (cb->kernel_address); 10052 10053 u64 monitor_base, fence_addr = 0; 10054 u32 stream_index, size = prop->size; 10055 u16 msg_addr_offset; 10056 10057 stream_index = prop->q_idx % 4; 10058 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 10059 QM_FENCE2_OFFSET + stream_index * 4; 10060 10061 /* 10062 * monitor_base should be the content of the base0 address registers, 10063 * so it will be added to the msg short offsets 10064 */ 10065 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 10066 10067 /* First monitor config packet: low address of the sync */ 10068 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 10069 monitor_base; 10070 10071 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 10072 10073 /* Second monitor config packet: high address of the sync */ 10074 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 10075 monitor_base; 10076 10077 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 10078 10079 /* 10080 * Third monitor config packet: the payload, i.e. what to write when the 10081 * sync triggers 10082 */ 10083 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10084 monitor_base; 10085 10086 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10087 10088 /* Fourth monitor config packet: bind the monitor to a sync object */ 10089 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10090 10091 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10092 prop->sob_val, msg_addr_offset); 10093 10094 /* Fence packet */ 10095 size += gaudi2_add_fence_pkt(buf + size); 10096 10097 return size; 10098 } 10099 10100 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10101 { 10102 struct hl_hw_sob *hw_sob = data; 10103 10104 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10105 10106 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10107 10108 kref_init(&hw_sob->kref); 10109 } 10110 10111 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10112 { 10113 } 10114 10115 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10116 { 10117 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10118 10119 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10120 } 10121 10122 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10123 { 10124 return 0; 10125 } 10126 10127 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10128 struct hl_cs *cs, u32 wait_queue_id, 10129 u32 collective_engine_id, u32 encaps_signal_offset) 10130 { 10131 return -EINVAL; 10132 } 10133 10134 /* 10135 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10136 * to DMMU page-size address (64MB) before mapping it in 10137 * the MMU. 10138 * The operation is performed on both the virtual and physical addresses. 10139 * for device with 6 HBMs the scramble is: 10140 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10141 * 10142 * Example: 10143 * ============================================================================= 10144 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10145 * Phys address in MMU last 10146 * HOP 10147 * ============================================================================= 10148 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10149 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10150 * ============================================================================= 10151 */ 10152 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10153 { 10154 struct asic_fixed_properties *prop = &hdev->asic_prop; 10155 u32 divisor, mod_va; 10156 u64 div_va; 10157 10158 /* accept any address in the DRAM address space */ 10159 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10160 VA_HBM_SPACE_END)) { 10161 10162 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10163 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10164 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10165 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10166 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10167 } 10168 10169 return raw_addr; 10170 } 10171 10172 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10173 { 10174 struct asic_fixed_properties *prop = &hdev->asic_prop; 10175 u32 divisor, mod_va; 10176 u64 div_va; 10177 10178 /* accept any address in the DRAM address space */ 10179 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10180 VA_HBM_SPACE_END)) { 10181 10182 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10183 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10184 PAGE_SIZE_64MB, &mod_va); 10185 10186 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10187 (div_va * divisor + mod_va)); 10188 } 10189 10190 return scrambled_addr; 10191 } 10192 10193 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10194 { 10195 u32 base = 0, dcore_id, dec_id; 10196 10197 if (core_id >= NUMBER_OF_DEC) { 10198 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10199 goto out; 10200 } 10201 10202 if (core_id < 8) { 10203 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10204 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10205 10206 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10207 dec_id * DCORE_VDEC_OFFSET; 10208 } else { 10209 /* PCIe Shared Decoder */ 10210 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10211 } 10212 out: 10213 return base; 10214 } 10215 10216 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10217 u32 *block_size, u32 *block_id) 10218 { 10219 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10220 int i; 10221 10222 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10223 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10224 *block_id = i; 10225 if (block_size) 10226 *block_size = gaudi2->mapped_blocks[i].size; 10227 return 0; 10228 } 10229 } 10230 10231 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10232 10233 return -EINVAL; 10234 } 10235 10236 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10237 u32 block_id, u32 block_size) 10238 { 10239 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10240 u64 offset_in_bar; 10241 u64 address; 10242 int rc; 10243 10244 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10245 dev_err(hdev->dev, "Invalid block id %u", block_id); 10246 return -EINVAL; 10247 } 10248 10249 /* we allow mapping only an entire block */ 10250 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10251 dev_err(hdev->dev, "Invalid block size %u", block_size); 10252 return -EINVAL; 10253 } 10254 10255 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10256 10257 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10258 10259 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10260 VM_DONTCOPY | VM_NORESERVE; 10261 10262 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10263 block_size, vma->vm_page_prot); 10264 if (rc) 10265 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10266 10267 return rc; 10268 } 10269 10270 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10271 { 10272 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10273 10274 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10275 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10276 10277 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10278 WREG32(irq_handler_offset, 10279 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10280 } 10281 10282 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10283 { 10284 switch (mmu_id) { 10285 case HW_CAP_DCORE0_DMMU0: 10286 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 10287 break; 10288 case HW_CAP_DCORE0_DMMU1: 10289 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 10290 break; 10291 case HW_CAP_DCORE0_DMMU2: 10292 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 10293 break; 10294 case HW_CAP_DCORE0_DMMU3: 10295 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 10296 break; 10297 case HW_CAP_DCORE1_DMMU0: 10298 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 10299 break; 10300 case HW_CAP_DCORE1_DMMU1: 10301 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 10302 break; 10303 case HW_CAP_DCORE1_DMMU2: 10304 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 10305 break; 10306 case HW_CAP_DCORE1_DMMU3: 10307 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 10308 break; 10309 case HW_CAP_DCORE2_DMMU0: 10310 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 10311 break; 10312 case HW_CAP_DCORE2_DMMU1: 10313 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 10314 break; 10315 case HW_CAP_DCORE2_DMMU2: 10316 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 10317 break; 10318 case HW_CAP_DCORE2_DMMU3: 10319 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 10320 break; 10321 case HW_CAP_DCORE3_DMMU0: 10322 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 10323 break; 10324 case HW_CAP_DCORE3_DMMU1: 10325 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 10326 break; 10327 case HW_CAP_DCORE3_DMMU2: 10328 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 10329 break; 10330 case HW_CAP_DCORE3_DMMU3: 10331 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 10332 break; 10333 case HW_CAP_PMMU: 10334 *mmu_base = mmPMMU_HBW_MMU_BASE; 10335 break; 10336 default: 10337 return -EINVAL; 10338 } 10339 10340 return 0; 10341 } 10342 10343 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 10344 { 10345 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 10346 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10347 u32 mmu_base; 10348 10349 if (!(gaudi2->hw_cap_initialized & mmu_id)) 10350 return; 10351 10352 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 10353 return; 10354 10355 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 10356 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 10357 } 10358 10359 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 10360 { 10361 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 10362 10363 /* check all HMMUs */ 10364 for (i = 0 ; i < num_of_hmmus ; i++) { 10365 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 10366 10367 if (mmu_cap_mask & mmu_id) 10368 gaudi2_ack_mmu_error(hdev, mmu_id); 10369 } 10370 10371 /* check PMMU */ 10372 if (mmu_cap_mask & HW_CAP_PMMU) 10373 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 10374 10375 return 0; 10376 } 10377 10378 static void gaudi2_get_msi_info(__le32 *table) 10379 { 10380 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 10381 } 10382 10383 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 10384 { 10385 switch (pll_idx) { 10386 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 10387 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 10388 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 10389 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 10390 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 10391 case HL_GAUDI2_MME_PLL: return MME_PLL; 10392 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 10393 case HL_GAUDI2_IF_PLL: return IF_PLL; 10394 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 10395 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 10396 case HL_GAUDI2_VID_PLL: return VID_PLL; 10397 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 10398 default: return -EINVAL; 10399 } 10400 } 10401 10402 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 10403 { 10404 /* Not implemented */ 10405 return 0; 10406 } 10407 10408 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 10409 { 10410 /* Not implemented */ 10411 return 0; 10412 } 10413 10414 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 10415 struct hl_device *hdev, struct hl_mon_state_dump *mon) 10416 { 10417 /* Not implemented */ 10418 return 0; 10419 } 10420 10421 10422 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 10423 u64 status_base_offset, enum hl_sync_engine_type engine_type, 10424 u32 engine_id, char **buf, size_t *size, size_t *offset) 10425 { 10426 /* Not implemented */ 10427 return 0; 10428 } 10429 10430 10431 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 10432 .monitor_valid = gaudi2_monitor_valid, 10433 .print_single_monitor = gaudi2_print_single_monitor, 10434 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 10435 .print_fences_single_engine = gaudi2_print_fences_single_engine, 10436 }; 10437 10438 static void gaudi2_state_dump_init(struct hl_device *hdev) 10439 { 10440 /* Not implemented */ 10441 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 10442 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 10443 } 10444 10445 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 10446 { 10447 return 0; 10448 } 10449 10450 static u32 *gaudi2_get_stream_master_qid_arr(void) 10451 { 10452 return NULL; 10453 } 10454 10455 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 10456 struct attribute_group *dev_vrm_attr_grp) 10457 { 10458 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 10459 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 10460 } 10461 10462 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 10463 u32 page_size, u32 *real_page_size, bool is_dram_addr) 10464 { 10465 struct asic_fixed_properties *prop = &hdev->asic_prop; 10466 10467 /* for host pages the page size must be */ 10468 if (!is_dram_addr) { 10469 if (page_size % mmu_prop->page_size) 10470 goto page_size_err; 10471 10472 *real_page_size = mmu_prop->page_size; 10473 return 0; 10474 } 10475 10476 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 10477 goto page_size_err; 10478 10479 /* 10480 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 10481 * than DRAM page size). 10482 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 10483 * this mismatch when calculating the address to place in the MMU page table. 10484 * (in that case also make sure that the dram_page_size is not greater than the 10485 * mmu page size) 10486 */ 10487 *real_page_size = prop->dram_page_size; 10488 10489 return 0; 10490 10491 page_size_err: 10492 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 10493 page_size, mmu_prop->page_size >> 10); 10494 return -EFAULT; 10495 } 10496 10497 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 10498 { 10499 return -EOPNOTSUPP; 10500 } 10501 10502 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 10503 { 10504 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10505 10506 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 10507 return 0; 10508 10509 return hl_fw_send_device_activity(hdev, open); 10510 } 10511 10512 static const struct hl_asic_funcs gaudi2_funcs = { 10513 .early_init = gaudi2_early_init, 10514 .early_fini = gaudi2_early_fini, 10515 .late_init = gaudi2_late_init, 10516 .late_fini = gaudi2_late_fini, 10517 .sw_init = gaudi2_sw_init, 10518 .sw_fini = gaudi2_sw_fini, 10519 .hw_init = gaudi2_hw_init, 10520 .hw_fini = gaudi2_hw_fini, 10521 .halt_engines = gaudi2_halt_engines, 10522 .suspend = gaudi2_suspend, 10523 .resume = gaudi2_resume, 10524 .mmap = gaudi2_mmap, 10525 .ring_doorbell = gaudi2_ring_doorbell, 10526 .pqe_write = gaudi2_pqe_write, 10527 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 10528 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 10529 .scrub_device_mem = gaudi2_scrub_device_mem, 10530 .scrub_device_dram = gaudi2_scrub_device_dram, 10531 .get_int_queue_base = NULL, 10532 .test_queues = gaudi2_test_queues, 10533 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 10534 .asic_dma_pool_free = gaudi2_dma_pool_free, 10535 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 10536 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 10537 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 10538 .asic_dma_map_single = gaudi2_dma_map_single, 10539 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 10540 .cs_parser = gaudi2_cs_parser, 10541 .asic_dma_map_sgtable = hl_dma_map_sgtable, 10542 .add_end_of_cb_packets = NULL, 10543 .update_eq_ci = gaudi2_update_eq_ci, 10544 .context_switch = gaudi2_context_switch, 10545 .restore_phase_topology = gaudi2_restore_phase_topology, 10546 .debugfs_read_dma = gaudi2_debugfs_read_dma, 10547 .add_device_attr = gaudi2_add_device_attr, 10548 .handle_eqe = gaudi2_handle_eqe, 10549 .get_events_stat = gaudi2_get_events_stat, 10550 .read_pte = NULL, 10551 .write_pte = NULL, 10552 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 10553 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 10554 .mmu_prefetch_cache_range = NULL, 10555 .send_heartbeat = gaudi2_send_heartbeat, 10556 .debug_coresight = gaudi2_debug_coresight, 10557 .is_device_idle = gaudi2_is_device_idle, 10558 .compute_reset_late_init = gaudi2_compute_reset_late_init, 10559 .hw_queues_lock = gaudi2_hw_queues_lock, 10560 .hw_queues_unlock = gaudi2_hw_queues_unlock, 10561 .get_pci_id = gaudi2_get_pci_id, 10562 .get_eeprom_data = gaudi2_get_eeprom_data, 10563 .get_monitor_dump = gaudi2_get_monitor_dump, 10564 .send_cpu_message = gaudi2_send_cpu_message, 10565 .pci_bars_map = gaudi2_pci_bars_map, 10566 .init_iatu = gaudi2_init_iatu, 10567 .rreg = hl_rreg, 10568 .wreg = hl_wreg, 10569 .halt_coresight = gaudi2_halt_coresight, 10570 .ctx_init = gaudi2_ctx_init, 10571 .ctx_fini = gaudi2_ctx_fini, 10572 .pre_schedule_cs = gaudi2_pre_schedule_cs, 10573 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 10574 .load_firmware_to_device = NULL, 10575 .load_boot_fit_to_device = NULL, 10576 .get_signal_cb_size = gaudi2_get_signal_cb_size, 10577 .get_wait_cb_size = gaudi2_get_wait_cb_size, 10578 .gen_signal_cb = gaudi2_gen_signal_cb, 10579 .gen_wait_cb = gaudi2_gen_wait_cb, 10580 .reset_sob = gaudi2_reset_sob, 10581 .reset_sob_group = gaudi2_reset_sob_group, 10582 .get_device_time = gaudi2_get_device_time, 10583 .pb_print_security_errors = gaudi2_pb_print_security_errors, 10584 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 10585 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 10586 .get_dec_base_addr = gaudi2_get_dec_base_addr, 10587 .scramble_addr = gaudi2_mmu_scramble_addr, 10588 .descramble_addr = gaudi2_mmu_descramble_addr, 10589 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 10590 .get_hw_block_id = gaudi2_get_hw_block_id, 10591 .hw_block_mmap = gaudi2_block_mmap, 10592 .enable_events_from_fw = gaudi2_enable_events_from_fw, 10593 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 10594 .get_msi_info = gaudi2_get_msi_info, 10595 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 10596 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 10597 .init_firmware_loader = gaudi2_init_firmware_loader, 10598 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 10599 .state_dump_init = gaudi2_state_dump_init, 10600 .get_sob_addr = &gaudi2_get_sob_addr, 10601 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 10602 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 10603 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 10604 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 10605 .access_dev_mem = hl_access_dev_mem, 10606 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 10607 .set_engine_cores = gaudi2_set_engine_cores, 10608 .send_device_activity = gaudi2_send_device_activity, 10609 .set_dram_properties = gaudi2_set_dram_properties, 10610 .set_binning_masks = gaudi2_set_binning_masks, 10611 }; 10612 10613 void gaudi2_set_asic_funcs(struct hl_device *hdev) 10614 { 10615 hdev->asic_funcs = &gaudi2_funcs; 10616 } 10617