1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/hw_ip/mmu/mmu_general.h" 11 #include "../include/hw_ip/mmu/mmu_v2_0.h" 12 #include "../include/gaudi2/gaudi2_packets.h" 13 #include "../include/gaudi2/gaudi2_reg_map.h" 14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 16 17 #include <linux/module.h> 18 #include <linux/pci.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 23 24 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ 26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 29 #define GAUDI2_RESET_POLL_CNT 3 30 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 31 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 32 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 33 #define GAUDI2_CB_POOL_CB_CNT 512 34 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 37 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 39 40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 41 42 /* 43 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 44 * and the code relies on that value (for array size etc..) we define another value 45 * for MAX faulty TPCs which reflects the cluster binning requirements 46 */ 47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 48 #define MAX_FAULTY_XBARS 1 49 #define MAX_FAULTY_EDMAS 1 50 #define MAX_FAULTY_DECODERS 1 51 52 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 53 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 54 #define GAUDI2_DECODER_FULL_MASK 0x3FF 55 56 #define GAUDI2_NA_EVENT_CAUSE 0xFF 57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 78 79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 81 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 82 83 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 85 86 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 87 88 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \ 89 (!((dma_core_idle_ind_mask) & \ 90 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \ 91 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK)))) 92 93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 94 95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 96 97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 98 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 99 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 100 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 101 102 #define PCIE_DEC_EN_MASK 0x300 103 #define DEC_WORK_STATE_IDLE 0 104 #define DEC_WORK_STATE_PEND 3 105 #define IS_DEC_IDLE(dec_swreg15) \ 106 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 107 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 108 109 /* HBM MMU address scrambling parameters */ 110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 115 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 116 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 117 #define MMU_RANGE_INV_EN_SHIFT 0 118 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 119 #define MMU_RANGE_INV_ASID_SHIFT 2 120 121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 122 * a 2 entries FIFO, and hence it is not enabled for it. 123 */ 124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 126 127 #define GAUDI2_MAX_STRING_LEN 64 128 129 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 130 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 131 132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 133 134 enum hl_pmmu_fatal_cause { 135 LATENCY_RD_OUT_FIFO_OVERRUN, 136 LATENCY_WR_OUT_FIFO_OVERRUN, 137 }; 138 139 enum hl_pcie_drain_ind_cause { 140 LBW_AXI_DRAIN_IND, 141 HBW_AXI_DRAIN_IND 142 }; 143 144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 145 [HBM_ID0] = 0xFFFC, 146 [HBM_ID1] = 0xFFCF, 147 [HBM_ID2] = 0xF7F7, 148 [HBM_ID3] = 0x7F7F, 149 [HBM_ID4] = 0xFCFF, 150 [HBM_ID5] = 0xCFFF, 151 }; 152 153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 154 [0] = HBM_ID0, 155 [1] = HBM_ID1, 156 [2] = HBM_ID4, 157 [3] = HBM_ID5, 158 }; 159 160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 161 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 162 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 163 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 164 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 165 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 166 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 167 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 168 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 169 }; 170 171 static const int gaudi2_qman_async_event_id[] = { 172 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 173 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 174 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 175 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 176 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 177 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 178 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 179 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 180 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 185 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 186 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 187 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 188 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 189 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 190 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 191 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 192 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 193 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 194 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 195 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 196 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 197 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 198 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 199 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 200 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 201 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 202 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 203 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 204 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 205 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 206 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 207 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 208 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 209 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 210 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 211 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 212 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 213 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 214 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 215 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 216 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 217 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 218 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 219 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 220 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 225 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 226 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 227 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 228 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 229 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 230 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 231 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 232 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 233 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 234 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 235 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 236 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 237 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 238 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 239 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 240 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 241 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 242 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 243 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 244 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 245 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 246 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 247 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 248 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 249 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 250 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 251 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 252 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 253 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 254 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 255 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 256 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 261 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 262 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 263 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 264 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 265 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 266 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 267 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 268 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 269 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 270 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 271 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 272 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 273 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 274 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 275 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 276 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 277 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 278 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 279 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 280 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 281 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 282 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 283 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 284 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 285 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 286 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 287 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 288 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 289 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 290 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 291 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 292 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 297 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 298 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 299 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 300 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 301 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 302 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 303 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 304 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 305 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 306 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 307 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 308 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 309 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 310 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 311 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 312 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 313 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 314 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 315 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 316 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 317 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 318 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 319 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 320 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 321 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 322 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 323 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 324 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 325 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 326 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 327 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 328 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 329 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 330 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 331 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 332 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 333 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 334 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 335 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 336 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 337 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 338 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 339 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 340 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 341 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 342 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 343 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 344 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 345 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 346 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 347 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 348 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 349 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 350 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 351 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 352 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 353 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 354 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 355 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 356 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 357 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 358 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 359 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 360 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 361 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 362 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 363 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 364 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 365 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 366 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 367 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 368 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 369 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 370 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 371 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 372 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 373 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 374 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 375 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 376 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 377 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 378 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 379 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 380 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 381 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 382 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 383 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 384 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 385 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 386 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 387 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 388 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 389 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 390 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 391 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 392 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 393 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 394 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 395 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 396 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 397 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 398 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 399 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 400 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 401 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 402 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 403 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 404 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 405 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 406 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 407 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 408 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 409 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 410 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 411 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 412 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 413 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 414 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 415 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 416 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 417 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 418 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 419 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 420 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 421 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 422 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 423 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 424 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 425 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 426 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 427 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 428 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 429 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 430 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 431 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 432 }; 433 434 static const int gaudi2_dma_core_async_event_id[] = { 435 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 436 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 437 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 438 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 439 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 440 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 441 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 442 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 443 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 444 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 445 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 446 }; 447 448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 449 "qman sei intr", 450 "arc sei intr" 451 }; 452 453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 454 "AXI_TERMINATOR WR", 455 "AXI_TERMINATOR RD", 456 "AXI SPLIT SEI Status" 457 }; 458 459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 460 "cbu_bresp_sei_intr_cause", 461 "cbu_rresp_sei_intr_cause", 462 "lbu_bresp_sei_intr_cause", 463 "lbu_rresp_sei_intr_cause", 464 "cbu_axi_split_intr_cause", 465 "lbu_axi_split_intr_cause", 466 "arc_ip_excptn_sei_intr_cause", 467 "dmi_bresp_sei_intr_cause", 468 "aux2apb_err_sei_intr_cause", 469 "cfg_lbw_wr_terminated_intr_cause", 470 "cfg_lbw_rd_terminated_intr_cause", 471 "cfg_dccm_wr_terminated_intr_cause", 472 "cfg_dccm_rd_terminated_intr_cause", 473 "cfg_hbw_rd_terminated_intr_cause" 474 }; 475 476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 477 "msix_vcd_hbw_sei", 478 "msix_l2c_hbw_sei", 479 "msix_nrm_hbw_sei", 480 "msix_abnrm_hbw_sei", 481 "msix_vcd_lbw_sei", 482 "msix_l2c_lbw_sei", 483 "msix_nrm_lbw_sei", 484 "msix_abnrm_lbw_sei", 485 "apb_vcd_lbw_sei", 486 "apb_l2c_lbw_sei", 487 "apb_nrm_lbw_sei", 488 "apb_abnrm_lbw_sei", 489 "dec_sei", 490 "dec_apb_sei", 491 "trc_apb_sei", 492 "lbw_mstr_if_sei", 493 "axi_split_bresp_err_sei", 494 "hbw_axi_wr_viol_sei", 495 "hbw_axi_rd_viol_sei", 496 "lbw_axi_wr_viol_sei", 497 "lbw_axi_rd_viol_sei", 498 "vcd_spi", 499 "l2c_spi", 500 "nrm_spi", 501 "abnrm_spi", 502 }; 503 504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 505 "PQ AXI HBW error", 506 "CQ AXI HBW error", 507 "CP AXI HBW error", 508 "CP error due to undefined OPCODE", 509 "CP encountered STOP OPCODE", 510 "CP AXI LBW error", 511 "CP WRREG32 or WRBULK returned error", 512 "N/A", 513 "FENCE 0 inc over max value and clipped", 514 "FENCE 1 inc over max value and clipped", 515 "FENCE 2 inc over max value and clipped", 516 "FENCE 3 inc over max value and clipped", 517 "FENCE 0 dec under min value and clipped", 518 "FENCE 1 dec under min value and clipped", 519 "FENCE 2 dec under min value and clipped", 520 "FENCE 3 dec under min value and clipped", 521 "CPDMA Up overflow", 522 "PQC L2H error" 523 }; 524 525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 526 "RSVD0", 527 "CQ AXI HBW error", 528 "CP AXI HBW error", 529 "CP error due to undefined OPCODE", 530 "CP encountered STOP OPCODE", 531 "CP AXI LBW error", 532 "CP WRREG32 or WRBULK returned error", 533 "N/A", 534 "FENCE 0 inc over max value and clipped", 535 "FENCE 1 inc over max value and clipped", 536 "FENCE 2 inc over max value and clipped", 537 "FENCE 3 inc over max value and clipped", 538 "FENCE 0 dec under min value and clipped", 539 "FENCE 1 dec under min value and clipped", 540 "FENCE 2 dec under min value and clipped", 541 "FENCE 3 dec under min value and clipped", 542 "CPDMA Up overflow", 543 "RSVD17", 544 "CQ_WR_IFIFO_CI_ERR", 545 "CQ_WR_CTL_CI_ERR", 546 "ARC_CQF_RD_ERR", 547 "ARC_CQ_WR_IFIFO_CI_ERR", 548 "ARC_CQ_WR_CTL_CI_ERR", 549 "ARC_AXI_ERR", 550 "CP_SWITCH_WDT_ERR" 551 }; 552 553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 554 "Choice push while full error", 555 "Choice Q watchdog error", 556 "MSG AXI LBW returned with error" 557 }; 558 559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 560 "qm_axi_err", 561 "qm_trace_fence_events", 562 "qm_sw_err", 563 "qm_cp_sw_stop", 564 "lbw_mstr_rresp_err", 565 "lbw_mstr_bresp_err", 566 "lbw_msg_slverr", 567 "hbw_msg_slverr", 568 "wbc_slverr", 569 "hbw_mstr_rresp_err", 570 "hbw_mstr_bresp_err", 571 "sb_resp_intr", 572 "mrsb_resp_intr", 573 "core_dw_status_0", 574 "core_dw_status_1", 575 "core_dw_status_2", 576 "core_dw_status_3", 577 "core_dw_status_4", 578 "core_dw_status_5", 579 "core_dw_status_6", 580 "core_dw_status_7", 581 "async_arc2cpu_sei_intr", 582 }; 583 584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 585 "tpc_address_exceed_slm", 586 "tpc_div_by_0", 587 "tpc_spu_mac_overflow", 588 "tpc_spu_addsub_overflow", 589 "tpc_spu_abs_overflow", 590 "tpc_spu_fma_fp_dst_nan", 591 "tpc_spu_fma_fp_dst_inf", 592 "tpc_spu_convert_fp_dst_nan", 593 "tpc_spu_convert_fp_dst_inf", 594 "tpc_spu_fp_dst_denorm", 595 "tpc_vpu_mac_overflow", 596 "tpc_vpu_addsub_overflow", 597 "tpc_vpu_abs_overflow", 598 "tpc_vpu_convert_fp_dst_nan", 599 "tpc_vpu_convert_fp_dst_inf", 600 "tpc_vpu_fma_fp_dst_nan", 601 "tpc_vpu_fma_fp_dst_inf", 602 "tpc_vpu_fp_dst_denorm", 603 "tpc_assertions", 604 "tpc_illegal_instruction", 605 "tpc_pc_wrap_around", 606 "tpc_qm_sw_err", 607 "tpc_hbw_rresp_err", 608 "tpc_hbw_bresp_err", 609 "tpc_lbw_rresp_err", 610 "tpc_lbw_bresp_err", 611 "st_unlock_already_locked", 612 "invalid_lock_access", 613 "LD_L protection violation", 614 "ST_L protection violation", 615 }; 616 617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 618 "agu_resp_intr", 619 "qman_axi_err", 620 "wap sei (wbc axi err)", 621 "arc sei", 622 "cfg access error", 623 "qm_sw_err", 624 "sbte_dbg_intr_0", 625 "sbte_dbg_intr_1", 626 "sbte_dbg_intr_2", 627 "sbte_dbg_intr_3", 628 "sbte_dbg_intr_4", 629 "sbte_prtn_intr_0", 630 "sbte_prtn_intr_1", 631 "sbte_prtn_intr_2", 632 "sbte_prtn_intr_3", 633 "sbte_prtn_intr_4", 634 }; 635 636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 637 "i0", 638 "i1", 639 "i2", 640 "i3", 641 "i4", 642 }; 643 644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 645 "WBC ERR RESP_0", 646 "WBC ERR RESP_1", 647 "AP SOURCE POS INF", 648 "AP SOURCE NEG INF", 649 "AP SOURCE NAN", 650 "AP RESULT POS INF", 651 "AP RESULT NEG INF", 652 }; 653 654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 655 "HBW Read returned with error RRESP", 656 "HBW write returned with error BRESP", 657 "LBW write returned with error BRESP", 658 "descriptor_fifo_overflow", 659 "KDMA SB LBW Read returned with error", 660 "KDMA WBC LBW Write returned with error", 661 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 662 "WRONG CFG FOR COMMIT IN LIN DMA" 663 }; 664 665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 666 "HBW/LBW Read returned with error RRESP", 667 "HBW/LBW write returned with error BRESP", 668 "LBW write returned with error BRESP", 669 "descriptor_fifo_overflow", 670 "KDMA SB LBW Read returned with error", 671 "KDMA WBC LBW Write returned with error", 672 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 673 "WRONG CFG FOR COMMIT IN LIN DMA" 674 }; 675 676 struct gaudi2_sm_sei_cause_data { 677 const char *cause_name; 678 const char *log_name; 679 }; 680 681 static const struct gaudi2_sm_sei_cause_data 682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 683 {"calculated SO value overflow/underflow", "SOB ID"}, 684 {"payload address of monitor is not aligned to 4B", "monitor addr"}, 685 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"}, 686 }; 687 688 static const char * const 689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 690 "LATENCY_RD_OUT_FIFO_OVERRUN", 691 "LATENCY_WR_OUT_FIFO_OVERRUN", 692 }; 693 694 static const char * const 695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 696 "LATENCY_RD_OUT_FIFO_OVERRUN", 697 "LATENCY_WR_OUT_FIFO_OVERRUN", 698 }; 699 700 static const char * const 701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 702 "AXI drain HBW", 703 "AXI drain LBW", 704 }; 705 706 static const char * const 707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 708 "HBW error response", 709 "LBW error response", 710 "TLP is blocked by RR" 711 }; 712 713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 714 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 715 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 716 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 717 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 718 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 719 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 720 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 721 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 722 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 728 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 729 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 730 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 732 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 733 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 734 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 736 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 737 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 738 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 740 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 741 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 742 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 744 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 745 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 746 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 748 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 749 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 750 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 752 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 753 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 754 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 756 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 757 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 758 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 760 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 761 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 762 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 768 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 769 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 770 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 772 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 773 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 774 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 776 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 777 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 778 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 780 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 781 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 782 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 784 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 785 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 786 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 788 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 789 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 790 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 792 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 793 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 794 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 796 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 797 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 798 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 804 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 805 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 806 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 808 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 809 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 810 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 812 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 813 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 814 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 816 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 817 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 818 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 820 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 821 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 822 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 824 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 825 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 826 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 828 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 829 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 830 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 832 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 833 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 834 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 840 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 841 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 842 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 844 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 845 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 846 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 848 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 849 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 850 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 852 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 853 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 854 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 856 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 857 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 858 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 860 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 861 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 862 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 864 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 865 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 866 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 868 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 869 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 870 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 871 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 872 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 873 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 874 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 875 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 876 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 877 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 878 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 879 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 880 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 881 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 882 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 883 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 884 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 885 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 886 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 887 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 888 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 889 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 890 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 891 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 892 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 893 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 894 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 895 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 896 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 897 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 898 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 899 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 900 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 901 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 902 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 903 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 904 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 905 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 906 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 907 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 908 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 909 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 910 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 911 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 912 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 913 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 914 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 915 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 916 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 917 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 918 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 919 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 920 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 921 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 922 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 923 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 924 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 925 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 926 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 927 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 928 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 929 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 930 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 931 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 932 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 933 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 934 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 935 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 936 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 937 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 938 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 939 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 940 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 941 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 942 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 943 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 944 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 945 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 946 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 947 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 948 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 949 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 950 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 951 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 952 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 953 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 954 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 955 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 956 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 957 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 958 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 959 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 960 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 961 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 962 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 963 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 964 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 965 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 966 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 967 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 968 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 969 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 970 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 971 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 972 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 973 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 974 }; 975 976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 977 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 978 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 979 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 980 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 981 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 982 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 983 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 984 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 985 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 986 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 987 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 988 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 989 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 990 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 991 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 992 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 993 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 994 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 995 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 996 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 997 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 998 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 999 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1000 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1001 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1002 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1003 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1004 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1005 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1006 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1007 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1008 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1009 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1010 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1011 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1012 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1013 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1014 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1015 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1016 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1017 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1018 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1019 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1020 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1021 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1022 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1023 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1024 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1025 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1026 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1027 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1028 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1029 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1030 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1031 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1032 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1033 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1034 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1035 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1036 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1037 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1038 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1039 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1040 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1041 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1042 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1043 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1044 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1045 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1046 }; 1047 1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1049 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1050 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1051 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1052 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1053 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1054 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1055 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1056 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1057 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1058 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1059 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1060 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1061 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1062 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1063 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1064 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1065 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1066 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1067 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1068 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1069 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1070 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1071 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1072 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1073 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1074 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1075 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1076 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1077 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1078 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1079 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1080 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1081 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1082 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1083 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1084 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1085 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1086 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1087 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1088 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1089 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1090 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1091 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1092 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1093 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1094 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1095 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1096 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1097 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1098 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1099 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1100 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1101 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1102 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1103 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1104 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1105 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1106 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1107 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1108 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1109 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1110 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1111 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1112 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1113 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1114 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1115 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1116 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1117 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1118 }; 1119 1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1121 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1122 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1123 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1124 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1125 }; 1126 1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1128 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1129 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1130 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1131 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1132 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1133 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1134 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1135 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1136 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1142 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1143 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1144 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1146 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1147 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1148 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1174 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1175 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1176 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1182 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1183 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1184 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1186 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1187 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1188 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1210 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1211 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1212 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1218 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1219 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1220 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1222 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1223 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1224 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1246 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1247 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1248 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1254 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1255 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1256 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1258 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1259 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1260 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1282 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1283 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1284 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1285 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1286 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1287 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1288 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1289 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1290 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1291 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1292 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1293 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1294 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1295 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1296 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1297 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1298 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1299 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1300 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1301 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1302 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1303 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1304 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1305 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1306 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1307 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1308 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1309 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1310 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1311 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1312 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1313 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1314 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1315 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1316 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1317 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1318 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1319 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1320 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1321 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1322 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1323 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1324 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1325 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1326 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1327 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1328 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1329 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1330 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1331 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1332 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1333 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1334 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1335 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1336 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1337 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1338 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1339 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1340 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1341 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1342 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1343 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1344 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1345 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1346 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1347 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1348 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1349 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1350 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1351 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1352 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1353 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1354 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1355 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1356 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1357 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1358 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1359 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1360 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1361 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1362 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1363 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1364 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1365 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1366 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1367 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1368 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1369 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1370 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1371 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1372 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1373 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1374 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1375 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1376 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1377 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1378 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1379 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1380 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1381 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1382 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1383 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1384 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1385 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1386 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1387 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1388 }; 1389 1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1391 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1392 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1393 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1394 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1395 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1396 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1397 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1398 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1399 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1400 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1401 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1402 }; 1403 1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1405 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1406 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1407 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1408 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1409 }; 1410 1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1412 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1413 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1414 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1415 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1416 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1417 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1418 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1419 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1420 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1421 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1422 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1423 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1424 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1425 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1426 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1427 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1428 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1429 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1430 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1431 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1432 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1433 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1434 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1435 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1436 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1437 }; 1438 1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1440 [ROTATOR_ID_0] = mmROT0_BASE, 1441 [ROTATOR_ID_1] = mmROT1_BASE 1442 }; 1443 1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1445 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1446 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1447 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1448 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1449 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1450 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1451 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1452 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1453 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1454 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1455 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1456 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1457 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1458 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1459 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1460 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1461 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1462 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1463 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1464 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1465 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1466 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1467 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1468 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1469 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1470 }; 1471 1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1473 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1474 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1475 }; 1476 1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1478 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1479 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1480 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1481 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1482 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1483 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1484 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1485 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1486 }; 1487 1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1489 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1490 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1491 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1492 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1493 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1494 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1495 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1496 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1497 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1498 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1499 }; 1500 1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = { 1502 RTR_ID_X_Y(2, 4), 1503 RTR_ID_X_Y(3, 4), 1504 RTR_ID_X_Y(4, 4), 1505 RTR_ID_X_Y(5, 4), 1506 RTR_ID_X_Y(6, 4), 1507 RTR_ID_X_Y(7, 4), 1508 RTR_ID_X_Y(8, 4), 1509 RTR_ID_X_Y(9, 4), 1510 RTR_ID_X_Y(10, 4), 1511 RTR_ID_X_Y(11, 4), 1512 RTR_ID_X_Y(12, 4), 1513 RTR_ID_X_Y(13, 4), 1514 RTR_ID_X_Y(14, 4), 1515 RTR_ID_X_Y(15, 4), 1516 RTR_ID_X_Y(16, 4), 1517 RTR_ID_X_Y(17, 4), 1518 RTR_ID_X_Y(2, 11), 1519 RTR_ID_X_Y(3, 11), 1520 RTR_ID_X_Y(4, 11), 1521 RTR_ID_X_Y(5, 11), 1522 RTR_ID_X_Y(6, 11), 1523 RTR_ID_X_Y(7, 11), 1524 RTR_ID_X_Y(8, 11), 1525 RTR_ID_X_Y(9, 11), 1526 RTR_ID_X_Y(0, 0),/* 24 no id */ 1527 RTR_ID_X_Y(0, 0),/* 25 no id */ 1528 RTR_ID_X_Y(0, 0),/* 26 no id */ 1529 RTR_ID_X_Y(0, 0),/* 27 no id */ 1530 RTR_ID_X_Y(14, 11), 1531 RTR_ID_X_Y(15, 11), 1532 RTR_ID_X_Y(16, 11), 1533 RTR_ID_X_Y(17, 11) 1534 }; 1535 1536 enum rtr_id { 1537 DCORE0_RTR0, 1538 DCORE0_RTR1, 1539 DCORE0_RTR2, 1540 DCORE0_RTR3, 1541 DCORE0_RTR4, 1542 DCORE0_RTR5, 1543 DCORE0_RTR6, 1544 DCORE0_RTR7, 1545 DCORE1_RTR0, 1546 DCORE1_RTR1, 1547 DCORE1_RTR2, 1548 DCORE1_RTR3, 1549 DCORE1_RTR4, 1550 DCORE1_RTR5, 1551 DCORE1_RTR6, 1552 DCORE1_RTR7, 1553 DCORE2_RTR0, 1554 DCORE2_RTR1, 1555 DCORE2_RTR2, 1556 DCORE2_RTR3, 1557 DCORE2_RTR4, 1558 DCORE2_RTR5, 1559 DCORE2_RTR6, 1560 DCORE2_RTR7, 1561 DCORE3_RTR0, 1562 DCORE3_RTR1, 1563 DCORE3_RTR2, 1564 DCORE3_RTR3, 1565 DCORE3_RTR4, 1566 DCORE3_RTR5, 1567 DCORE3_RTR6, 1568 DCORE3_RTR7, 1569 }; 1570 1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1572 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1573 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1574 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1575 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1576 DCORE0_RTR0 1577 }; 1578 1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { 1580 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1581 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1582 }; 1583 1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { 1585 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1586 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1587 }; 1588 1589 struct sft_info { 1590 u8 interface_id; 1591 u8 dcore_id; 1592 }; 1593 1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1595 {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3}, 1596 }; 1597 1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { 1599 DCORE0_RTR0, DCORE0_RTR0 1600 }; 1601 1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { 1603 DCORE2_RTR0, DCORE3_RTR7 1604 }; 1605 1606 struct mme_initiators_rtr_id { 1607 u32 wap0; 1608 u32 wap1; 1609 u32 write; 1610 u32 read; 1611 u32 sbte0; 1612 u32 sbte1; 1613 u32 sbte2; 1614 u32 sbte3; 1615 u32 sbte4; 1616 }; 1617 1618 enum mme_initiators { 1619 MME_WAP0 = 0, 1620 MME_WAP1, 1621 MME_WRITE, 1622 MME_READ, 1623 MME_SBTE0, 1624 MME_SBTE1, 1625 MME_SBTE2, 1626 MME_SBTE3, 1627 MME_SBTE4, 1628 MME_INITIATORS_MAX 1629 }; 1630 1631 static const struct mme_initiators_rtr_id 1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1633 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1634 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1635 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1636 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1637 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1638 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1639 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1640 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1641 }; 1642 1643 enum razwi_event_sources { 1644 RAZWI_TPC, 1645 RAZWI_MME, 1646 RAZWI_EDMA, 1647 RAZWI_PDMA, 1648 RAZWI_NIC, 1649 RAZWI_DEC, 1650 RAZWI_ROT 1651 }; 1652 1653 struct hbm_mc_error_causes { 1654 u32 mask; 1655 char cause[50]; 1656 }; 1657 1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 1659 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 1660 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 1661 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 1662 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 1663 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 1664 }; 1665 1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 1667 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 1668 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 1669 [HBM_SEI_READ_ERR] = "SEI read data error", 1670 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 1671 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 1672 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 1673 [HBM_SEI_DFI] = "SEI DFI error", 1674 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 1675 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 1676 }; 1677 1678 struct mmu_spi_sei_cause { 1679 char cause[50]; 1680 int clear_bit; 1681 }; 1682 1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 1684 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 1685 {"page access", 1}, /* INTERRUPT_CLR[1] */ 1686 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 1687 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 1688 {"mmu rei0", -1}, /* no clear register bit */ 1689 {"mmu rei1", -1}, /* no clear register bit */ 1690 {"stlb rei0", -1}, /* no clear register bit */ 1691 {"stlb rei1", -1}, /* no clear register bit */ 1692 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 1693 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 1694 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 1695 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 1696 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1697 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1698 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1699 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 1700 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 1701 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 1702 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 1703 }; 1704 1705 struct gaudi2_cache_invld_params { 1706 u64 start_va; 1707 u64 end_va; 1708 u32 inv_start_val; 1709 u32 flags; 1710 bool range_invalidation; 1711 }; 1712 1713 struct gaudi2_tpc_idle_data { 1714 struct engines_data *e; 1715 unsigned long *mask; 1716 bool *is_idle; 1717 const char *tpc_fmt; 1718 }; 1719 1720 struct gaudi2_tpc_mmu_data { 1721 u32 rw_asid; 1722 }; 1723 1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 1725 1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 1733 bool is_memset); 1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 1735 1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 1737 { 1738 1739 } 1740 1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 1742 { 1743 return sizeof(struct packet_msg_short); 1744 } 1745 1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 1747 { 1748 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 1749 } 1750 1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 1752 { 1753 struct asic_fixed_properties *prop = &hdev->asic_prop; 1754 int dcore, inst, tpc_seq; 1755 u32 offset; 1756 1757 /* init the return code */ 1758 ctx->rc = 0; 1759 1760 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 1761 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 1762 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 1763 1764 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 1765 continue; 1766 1767 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 1768 1769 ctx->fn(hdev, dcore, inst, offset, ctx); 1770 if (ctx->rc) { 1771 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 1772 dcore, inst); 1773 return; 1774 } 1775 } 1776 } 1777 1778 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 1779 return; 1780 1781 /* special check for PCI TPC (DCORE0_TPC6) */ 1782 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 1783 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 1784 if (ctx->rc) 1785 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 1786 } 1787 1788 static bool gaudi2_host_phys_addr_valid(u64 addr) 1789 { 1790 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 1791 return true; 1792 1793 return false; 1794 } 1795 1796 static int set_number_of_functional_hbms(struct hl_device *hdev) 1797 { 1798 struct asic_fixed_properties *prop = &hdev->asic_prop; 1799 u8 faulty_hbms = hweight64(hdev->dram_binning); 1800 1801 /* check if all HBMs should be used */ 1802 if (!faulty_hbms) { 1803 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 1804 prop->num_functional_hbms = GAUDI2_HBM_NUM; 1805 return 0; 1806 } 1807 1808 /* 1809 * check for error condition in which number of binning 1810 * candidates is higher than the maximum supported by the 1811 * driver (in which case binning mask shall be ignored and driver will 1812 * set the default) 1813 */ 1814 if (faulty_hbms > MAX_FAULTY_HBMS) { 1815 dev_err(hdev->dev, 1816 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 1817 MAX_FAULTY_HBMS, hdev->dram_binning); 1818 return -EINVAL; 1819 } 1820 1821 /* 1822 * by default, number of functional HBMs in Gaudi2 is always 1823 * GAUDI2_HBM_NUM - 1. 1824 */ 1825 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 1826 return 0; 1827 } 1828 1829 static int gaudi2_set_dram_properties(struct hl_device *hdev) 1830 { 1831 struct asic_fixed_properties *prop = &hdev->asic_prop; 1832 u32 basic_hbm_page_size; 1833 int rc; 1834 1835 rc = set_number_of_functional_hbms(hdev); 1836 if (rc) 1837 return -EINVAL; 1838 1839 /* 1840 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 1841 * in which we are using x16 bigger page size to be able to populate the entire 1842 * HBM mappings in the TLB 1843 */ 1844 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 1845 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 1846 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 1847 prop->dram_size = prop->num_functional_hbms * SZ_16G; 1848 prop->dram_base_address = DRAM_PHYS_BASE; 1849 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 1850 prop->dram_supports_virtual_memory = true; 1851 1852 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 1853 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 1854 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 1855 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 1856 1857 /* since DRAM page size differs from DMMU page size we need to allocate 1858 * DRAM memory in units of dram_page size and mapping this memory in 1859 * units of DMMU page size. we overcome this size mismatch using a 1860 * scrambling routine which takes a DRAM page and converts it to a DMMU 1861 * page. 1862 * We therefore: 1863 * 1. partition the virtual address space to DRAM-page (whole) pages. 1864 * (suppose we get n such pages) 1865 * 2. limit the amount of virtual address space we got from 1 above to 1866 * a multiple of 64M as we don't want the scrambled address to cross 1867 * the DRAM virtual address space. 1868 * ( m = (n * DRAM_page_size) / DMMU_page_size). 1869 * 3. determine the and address accordingly 1870 * end_addr = start_addr + m * 48M 1871 * 1872 * the DRAM address MSBs (63:48) are not part of the roundup calculation 1873 */ 1874 prop->dmmu.start_addr = prop->dram_base_address + 1875 (prop->dram_page_size * 1876 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 1877 1878 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 1879 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 1880 1881 return 0; 1882 } 1883 1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 1885 { 1886 struct asic_fixed_properties *prop = &hdev->asic_prop; 1887 struct hw_queue_properties *q_props; 1888 u32 num_sync_stream_queues = 0; 1889 int i; 1890 1891 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 1892 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 1893 GFP_KERNEL); 1894 1895 if (!prop->hw_queues_props) 1896 return -ENOMEM; 1897 1898 q_props = prop->hw_queues_props; 1899 1900 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 1901 q_props[i].type = QUEUE_TYPE_HW; 1902 q_props[i].driver_only = 0; 1903 1904 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 1905 q_props[i].supports_sync_stream = 0; 1906 } else { 1907 q_props[i].supports_sync_stream = 1; 1908 num_sync_stream_queues++; 1909 } 1910 1911 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 1912 } 1913 1914 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 1915 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 1916 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 1917 1918 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 1919 prop->cfg_base_address = CFG_BASE; 1920 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 1921 prop->host_base_address = HOST_PHYS_BASE_0; 1922 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 1923 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 1924 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 1925 prop->user_dec_intr_count = NUMBER_OF_DEC; 1926 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 1927 prop->completion_mode = HL_COMPLETION_MODE_CS; 1928 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 1929 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 1930 1931 prop->sram_base_address = SRAM_BASE_ADDR; 1932 prop->sram_size = SRAM_SIZE; 1933 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 1934 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 1935 1936 prop->hints_range_reservation = true; 1937 1938 if (hdev->pldm) 1939 prop->mmu_pgt_size = 0x800000; /* 8MB */ 1940 else 1941 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 1942 1943 prop->mmu_pte_size = HL_PTE_SIZE; 1944 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 1945 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 1946 1947 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 1948 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 1949 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 1950 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 1951 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 1952 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 1953 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 1954 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 1955 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 1956 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 1957 prop->dmmu.page_size = PAGE_SIZE_1GB; 1958 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 1959 prop->dmmu.last_mask = LAST_MASK; 1960 prop->dmmu.host_resident = 1; 1961 /* TODO: will be duplicated until implementing per-MMU props */ 1962 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 1963 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1964 1965 /* 1966 * this is done in order to be able to validate FW descriptor (i.e. validating that 1967 * the addresses and allocated space for FW image does not cross memory bounds). 1968 * for this reason we set the DRAM size to the minimum possible and later it will 1969 * be modified according to what reported in the cpucp info packet 1970 */ 1971 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 1972 1973 hdev->pmmu_huge_range = true; 1974 prop->pmmu.host_resident = 1; 1975 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 1976 prop->pmmu.last_mask = LAST_MASK; 1977 /* TODO: will be duplicated until implementing per-MMU props */ 1978 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 1979 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 1980 1981 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 1982 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 1983 prop->hints_host_hpage_reserved_va_range.start_addr = 1984 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 1985 prop->hints_host_hpage_reserved_va_range.end_addr = 1986 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 1987 1988 if (PAGE_SIZE == SZ_64K) { 1989 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 1990 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 1991 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 1992 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 1993 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 1994 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 1995 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 1996 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 1997 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 1998 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 1999 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2000 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2001 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2002 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2003 prop->pmmu.page_size = PAGE_SIZE_64KB; 2004 2005 /* shifts and masks are the same in PMMU and HPMMU */ 2006 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2007 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2008 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2009 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2010 } else { 2011 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2012 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2013 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2014 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2015 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2016 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2017 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2018 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2019 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2020 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2021 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2022 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2023 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2024 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2025 prop->pmmu.page_size = PAGE_SIZE_4KB; 2026 2027 /* shifts and masks are the same in PMMU and HPMMU */ 2028 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2029 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2030 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2031 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2032 } 2033 2034 prop->num_engine_cores = CPU_ID_MAX; 2035 prop->cfg_size = CFG_SIZE; 2036 prop->max_asid = MAX_ASID; 2037 prop->num_of_events = GAUDI2_EVENT_SIZE; 2038 2039 prop->dc_power_default = DC_POWER_DEFAULT; 2040 2041 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2042 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2043 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2044 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2045 2046 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2047 2048 prop->mme_master_slave_mode = 1; 2049 2050 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2051 (num_sync_stream_queues * HL_RSVD_SOBS); 2052 2053 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2054 (num_sync_stream_queues * HL_RSVD_MONS); 2055 2056 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2057 2058 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2059 2060 prop->fw_cpu_boot_dev_sts0_valid = false; 2061 prop->fw_cpu_boot_dev_sts1_valid = false; 2062 prop->hard_reset_done_by_fw = false; 2063 prop->gic_interrupts_enable = true; 2064 2065 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2066 2067 prop->max_dec = NUMBER_OF_DEC; 2068 2069 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2070 2071 prop->dma_mask = 64; 2072 2073 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; 2074 2075 return 0; 2076 } 2077 2078 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2079 { 2080 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2081 bool is_wc[3] = {false, false, true}; 2082 int rc; 2083 2084 rc = hl_pci_bars_map(hdev, name, is_wc); 2085 if (rc) 2086 return rc; 2087 2088 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2089 2090 return 0; 2091 } 2092 2093 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2094 { 2095 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2096 struct hl_inbound_pci_region pci_region; 2097 u64 old_addr = addr; 2098 int rc; 2099 2100 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2101 return old_addr; 2102 2103 if (hdev->asic_prop.iatu_done_by_fw) 2104 return U64_MAX; 2105 2106 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2107 pci_region.mode = PCI_BAR_MATCH_MODE; 2108 pci_region.bar = DRAM_BAR_ID; 2109 pci_region.addr = addr; 2110 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2111 if (rc) 2112 return U64_MAX; 2113 2114 if (gaudi2) { 2115 old_addr = gaudi2->dram_bar_cur_addr; 2116 gaudi2->dram_bar_cur_addr = addr; 2117 } 2118 2119 return old_addr; 2120 } 2121 2122 static int gaudi2_init_iatu(struct hl_device *hdev) 2123 { 2124 struct hl_inbound_pci_region inbound_region; 2125 struct hl_outbound_pci_region outbound_region; 2126 u32 bar_addr_low, bar_addr_high; 2127 int rc; 2128 2129 if (hdev->asic_prop.iatu_done_by_fw) 2130 return 0; 2131 2132 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2133 * We must map this region in BAR match mode in order to 2134 * fetch BAR physical base address 2135 */ 2136 inbound_region.mode = PCI_BAR_MATCH_MODE; 2137 inbound_region.bar = SRAM_CFG_BAR_ID; 2138 /* Base address must be aligned to Bar size which is 256 MB */ 2139 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2140 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2141 if (rc) 2142 return rc; 2143 2144 /* Fetch physical BAR address */ 2145 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2146 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2147 2148 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2149 2150 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2151 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2152 inbound_region.bar = SRAM_CFG_BAR_ID; 2153 inbound_region.offset_in_bar = 0; 2154 inbound_region.addr = STM_FLASH_BASE_ADDR; 2155 inbound_region.size = CFG_REGION_SIZE; 2156 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2157 if (rc) 2158 return rc; 2159 2160 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2161 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2162 inbound_region.bar = SRAM_CFG_BAR_ID; 2163 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2164 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2165 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2166 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2167 if (rc) 2168 return rc; 2169 2170 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2171 inbound_region.mode = PCI_BAR_MATCH_MODE; 2172 inbound_region.bar = DRAM_BAR_ID; 2173 inbound_region.addr = DRAM_PHYS_BASE; 2174 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2175 if (rc) 2176 return rc; 2177 2178 /* Outbound Region 0 - Point to Host */ 2179 outbound_region.addr = HOST_PHYS_BASE_0; 2180 outbound_region.size = HOST_PHYS_SIZE_0; 2181 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2182 2183 return rc; 2184 } 2185 2186 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2187 { 2188 return RREG32(mmHW_STATE); 2189 } 2190 2191 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2192 { 2193 struct asic_fixed_properties *prop = &hdev->asic_prop; 2194 2195 /* 2196 * check for error condition in which number of binning candidates 2197 * is higher than the maximum supported by the driver 2198 */ 2199 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2200 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2201 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2202 hdev->tpc_binning); 2203 return -EINVAL; 2204 } 2205 2206 prop->tpc_binning_mask = hdev->tpc_binning; 2207 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2208 2209 return 0; 2210 } 2211 2212 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2213 { 2214 struct asic_fixed_properties *prop = &hdev->asic_prop; 2215 struct hw_queue_properties *q_props = prop->hw_queues_props; 2216 u64 tpc_binning_mask; 2217 u8 subst_idx = 0; 2218 int i, rc; 2219 2220 rc = gaudi2_tpc_binning_init_prop(hdev); 2221 if (rc) 2222 return rc; 2223 2224 tpc_binning_mask = prop->tpc_binning_mask; 2225 2226 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2227 u8 subst_seq, binned, qid_base; 2228 2229 if (tpc_binning_mask == 0) 2230 break; 2231 2232 if (subst_idx == 0) { 2233 subst_seq = TPC_ID_DCORE0_TPC6; 2234 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2235 } else { 2236 subst_seq = TPC_ID_DCORE3_TPC5; 2237 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2238 } 2239 2240 2241 /* clear bit from mask */ 2242 binned = __ffs(tpc_binning_mask); 2243 /* 2244 * Coverity complains about possible out-of-bound access in 2245 * clear_bit 2246 */ 2247 if (binned >= TPC_ID_SIZE) { 2248 dev_err(hdev->dev, 2249 "Invalid binned TPC (binning mask: %llx)\n", 2250 tpc_binning_mask); 2251 return -EINVAL; 2252 } 2253 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2254 2255 /* also clear replacing TPC bit from enabled mask */ 2256 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2257 2258 /* bin substite TPC's Qs */ 2259 q_props[qid_base].binned = 1; 2260 q_props[qid_base + 1].binned = 1; 2261 q_props[qid_base + 2].binned = 1; 2262 q_props[qid_base + 3].binned = 1; 2263 2264 subst_idx++; 2265 } 2266 2267 return 0; 2268 } 2269 2270 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2271 { 2272 struct asic_fixed_properties *prop = &hdev->asic_prop; 2273 u8 num_faulty; 2274 2275 num_faulty = hweight32(hdev->decoder_binning); 2276 2277 /* 2278 * check for error condition in which number of binning candidates 2279 * is higher than the maximum supported by the driver 2280 */ 2281 if (num_faulty > MAX_FAULTY_DECODERS) { 2282 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2283 hdev->decoder_binning); 2284 return -EINVAL; 2285 } 2286 2287 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2288 2289 if (prop->decoder_binning_mask) 2290 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2291 else 2292 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2293 2294 return 0; 2295 } 2296 2297 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2298 { 2299 struct asic_fixed_properties *prop = &hdev->asic_prop; 2300 2301 /* check if we should override default binning */ 2302 if (!hdev->dram_binning) { 2303 prop->dram_binning_mask = 0; 2304 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2305 return; 2306 } 2307 2308 /* set DRAM binning constraints */ 2309 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2310 prop->dram_binning_mask = hdev->dram_binning; 2311 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2312 } 2313 2314 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2315 { 2316 struct asic_fixed_properties *prop = &hdev->asic_prop; 2317 struct hw_queue_properties *q_props; 2318 u8 seq, num_faulty; 2319 2320 num_faulty = hweight32(hdev->edma_binning); 2321 2322 /* 2323 * check for error condition in which number of binning candidates 2324 * is higher than the maximum supported by the driver 2325 */ 2326 if (num_faulty > MAX_FAULTY_EDMAS) { 2327 dev_err(hdev->dev, 2328 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2329 hdev->edma_binning); 2330 return -EINVAL; 2331 } 2332 2333 if (!hdev->edma_binning) { 2334 prop->edma_binning_mask = 0; 2335 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2336 return 0; 2337 } 2338 2339 seq = __ffs((unsigned long)hdev->edma_binning); 2340 2341 /* set binning constraints */ 2342 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2343 prop->edma_binning_mask = hdev->edma_binning; 2344 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2345 2346 /* bin substitute EDMA's queue */ 2347 q_props = prop->hw_queues_props; 2348 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2349 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2350 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2351 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2352 2353 return 0; 2354 } 2355 2356 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2357 { 2358 struct asic_fixed_properties *prop = &hdev->asic_prop; 2359 u8 num_faulty, seq; 2360 2361 /* check if we should override default binning */ 2362 if (!xbar_edge_iso_mask) { 2363 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2364 return 0; 2365 } 2366 2367 /* 2368 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2369 * only the FW can set a redundancy value). for user it'll always be 0. 2370 */ 2371 num_faulty = hweight32(xbar_edge_iso_mask); 2372 2373 /* 2374 * check for error condition in which number of binning candidates 2375 * is higher than the maximum supported by the driver 2376 */ 2377 if (num_faulty > MAX_FAULTY_XBARS) { 2378 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2379 MAX_FAULTY_XBARS); 2380 return -EINVAL; 2381 } 2382 2383 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2384 2385 /* set binning constraints */ 2386 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2387 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2388 2389 return 0; 2390 } 2391 2392 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2393 { 2394 int rc; 2395 2396 /* 2397 * mark all clusters as good, each component will "fail" cluster 2398 * based on eFuse/user values. 2399 * If more than single cluster is faulty- the chip is unusable 2400 */ 2401 hdev->asic_prop.faulty_dram_cluster_map = 0; 2402 2403 gaudi2_set_dram_binning_masks(hdev); 2404 2405 rc = gaudi2_set_edma_binning_masks(hdev); 2406 if (rc) 2407 return rc; 2408 2409 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2410 if (rc) 2411 return rc; 2412 2413 2414 /* always initially set to full mask */ 2415 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2416 2417 return 0; 2418 } 2419 2420 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2421 { 2422 struct asic_fixed_properties *prop = &hdev->asic_prop; 2423 int rc; 2424 2425 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2426 if (rc) 2427 return rc; 2428 2429 /* if we have DRAM binning reported by FW we should perform cluster config */ 2430 if (prop->faulty_dram_cluster_map) { 2431 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2432 2433 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2434 } 2435 2436 return 0; 2437 } 2438 2439 static int gaudi2_set_binning_masks(struct hl_device *hdev) 2440 { 2441 int rc; 2442 2443 rc = gaudi2_set_cluster_binning_masks(hdev); 2444 if (rc) 2445 return rc; 2446 2447 rc = gaudi2_set_tpc_binning_masks(hdev); 2448 if (rc) 2449 return rc; 2450 2451 rc = gaudi2_set_dec_binning_masks(hdev); 2452 if (rc) 2453 return rc; 2454 2455 return 0; 2456 } 2457 2458 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2459 { 2460 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2461 struct asic_fixed_properties *prop = &hdev->asic_prop; 2462 long max_power; 2463 u64 dram_size; 2464 int rc; 2465 2466 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2467 return 0; 2468 2469 /* No point of asking this information again when not doing hard reset, as the device 2470 * CPU hasn't been reset 2471 */ 2472 if (hdev->reset_info.in_compute_reset) 2473 return 0; 2474 2475 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2476 mmCPU_BOOT_ERR1); 2477 if (rc) 2478 return rc; 2479 2480 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2481 if (dram_size) { 2482 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2483 2484 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2485 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2486 dev_err(hdev->dev, 2487 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2488 dram_size, prop->dram_size); 2489 dram_size = prop->dram_size; 2490 } 2491 2492 prop->dram_size = dram_size; 2493 prop->dram_end_address = prop->dram_base_address + dram_size; 2494 } 2495 2496 if (!strlen(prop->cpucp_info.card_name)) 2497 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2498 2499 /* Overwrite binning masks with the actual binning values from F/W */ 2500 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2501 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2502 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2503 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2504 2505 /* 2506 * at this point the DRAM parameters need to be updated according to data obtained 2507 * from the FW 2508 */ 2509 rc = hdev->asic_funcs->set_dram_properties(hdev); 2510 if (rc) 2511 return rc; 2512 2513 rc = hdev->asic_funcs->set_binning_masks(hdev); 2514 if (rc) 2515 return rc; 2516 2517 max_power = hl_fw_get_max_power(hdev); 2518 if (max_power < 0) 2519 return max_power; 2520 2521 prop->max_power_default = (u64) max_power; 2522 2523 return 0; 2524 } 2525 2526 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2527 { 2528 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2529 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2530 int rc; 2531 2532 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2533 return 0; 2534 2535 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2536 if (rc) 2537 return rc; 2538 2539 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2540 2541 return 0; 2542 } 2543 2544 static int gaudi2_early_init(struct hl_device *hdev) 2545 { 2546 struct asic_fixed_properties *prop = &hdev->asic_prop; 2547 struct pci_dev *pdev = hdev->pdev; 2548 resource_size_t pci_bar_size; 2549 int rc; 2550 2551 rc = gaudi2_set_fixed_properties(hdev); 2552 if (rc) 2553 return rc; 2554 2555 /* Check BAR sizes */ 2556 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2557 2558 if (pci_bar_size != CFG_BAR_SIZE) { 2559 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2560 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2561 rc = -ENODEV; 2562 goto free_queue_props; 2563 } 2564 2565 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2566 if (pci_bar_size != MSIX_BAR_SIZE) { 2567 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2568 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2569 rc = -ENODEV; 2570 goto free_queue_props; 2571 } 2572 2573 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2574 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2575 2576 /* 2577 * Only in pldm driver config iATU 2578 */ 2579 if (hdev->pldm) 2580 hdev->asic_prop.iatu_done_by_fw = false; 2581 else 2582 hdev->asic_prop.iatu_done_by_fw = true; 2583 2584 rc = hl_pci_init(hdev); 2585 if (rc) 2586 goto free_queue_props; 2587 2588 /* Before continuing in the initialization, we need to read the preboot 2589 * version to determine whether we run with a security-enabled firmware 2590 */ 2591 rc = hl_fw_read_preboot_status(hdev); 2592 if (rc) { 2593 if (hdev->reset_on_preboot_fail) 2594 hdev->asic_funcs->hw_fini(hdev, true, false); 2595 goto pci_fini; 2596 } 2597 2598 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2599 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2600 hdev->asic_funcs->hw_fini(hdev, true, false); 2601 } 2602 2603 return 0; 2604 2605 pci_fini: 2606 hl_pci_fini(hdev); 2607 free_queue_props: 2608 kfree(hdev->asic_prop.hw_queues_props); 2609 return rc; 2610 } 2611 2612 static int gaudi2_early_fini(struct hl_device *hdev) 2613 { 2614 kfree(hdev->asic_prop.hw_queues_props); 2615 hl_pci_fini(hdev); 2616 2617 return 0; 2618 } 2619 2620 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 2621 { 2622 switch (arc_id) { 2623 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 2624 return true; 2625 default: 2626 return false; 2627 } 2628 } 2629 2630 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 2631 { 2632 switch (arc_id) { 2633 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 2634 return true; 2635 default: 2636 return false; 2637 } 2638 } 2639 2640 static void gaudi2_init_arcs(struct hl_device *hdev) 2641 { 2642 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2643 u64 arc_id; 2644 u32 i; 2645 2646 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 2647 if (gaudi2_is_arc_enabled(hdev, i)) 2648 continue; 2649 2650 gaudi2_set_arc_id_cap(hdev, i); 2651 } 2652 2653 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 2654 if (!gaudi2_is_queue_enabled(hdev, i)) 2655 continue; 2656 2657 arc_id = gaudi2_queue_id_to_arc_id[i]; 2658 if (gaudi2_is_arc_enabled(hdev, arc_id)) 2659 continue; 2660 2661 if (gaudi2_is_arc_nic_owned(arc_id) && 2662 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 2663 continue; 2664 2665 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 2666 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 2667 continue; 2668 2669 gaudi2_set_arc_id_cap(hdev, arc_id); 2670 } 2671 } 2672 2673 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 2674 { 2675 u32 reg_base, reg_val; 2676 int rc; 2677 2678 switch (cpu_id) { 2679 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 2680 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 2681 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2682 ARC_DCCM_BLOCK_SIZE * 2, true); 2683 if (rc) 2684 return rc; 2685 break; 2686 case CPU_ID_SCHED_ARC4: 2687 case CPU_ID_SCHED_ARC5: 2688 case CPU_ID_MME_QMAN_ARC0: 2689 case CPU_ID_MME_QMAN_ARC1: 2690 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 2691 2692 /* Scrub lower DCCM block */ 2693 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2694 ARC_DCCM_BLOCK_SIZE, true); 2695 if (rc) 2696 return rc; 2697 2698 /* Switch to upper DCCM block */ 2699 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 2700 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2701 2702 /* Scrub upper DCCM block */ 2703 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2704 ARC_DCCM_BLOCK_SIZE, true); 2705 if (rc) 2706 return rc; 2707 2708 /* Switch to lower DCCM block */ 2709 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 2710 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 2711 break; 2712 default: 2713 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 2714 ARC_DCCM_BLOCK_SIZE, true); 2715 if (rc) 2716 return rc; 2717 } 2718 2719 return 0; 2720 } 2721 2722 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 2723 { 2724 u16 arc_id; 2725 2726 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 2727 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 2728 continue; 2729 2730 gaudi2_scrub_arc_dccm(hdev, arc_id); 2731 } 2732 } 2733 2734 static int gaudi2_late_init(struct hl_device *hdev) 2735 { 2736 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2737 int rc; 2738 2739 hdev->asic_prop.supports_advanced_cpucp_rc = true; 2740 2741 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 2742 gaudi2->virt_msix_db_dma_addr); 2743 if (rc) { 2744 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 2745 return rc; 2746 } 2747 2748 rc = gaudi2_fetch_psoc_frequency(hdev); 2749 if (rc) { 2750 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 2751 goto disable_pci_access; 2752 } 2753 2754 gaudi2_init_arcs(hdev); 2755 gaudi2_scrub_arcs_dccm(hdev); 2756 gaudi2_init_security(hdev); 2757 2758 return 0; 2759 2760 disable_pci_access: 2761 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2762 2763 return rc; 2764 } 2765 2766 static void gaudi2_late_fini(struct hl_device *hdev) 2767 { 2768 hl_hwmon_release_resources(hdev); 2769 } 2770 2771 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 2772 { 2773 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2774 2775 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2776 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2777 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2778 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2779 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2780 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2781 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2782 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2783 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 2784 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 2785 } 2786 2787 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 2788 { 2789 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2790 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 2791 u32 block_size, umr_start_idx, num_umr_blocks; 2792 int i; 2793 2794 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 2795 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 2796 block_size = ARC_DCCM_BLOCK_SIZE * 2; 2797 else 2798 block_size = ARC_DCCM_BLOCK_SIZE; 2799 2800 blocks[i].address = gaudi2_arc_dccm_bases[i]; 2801 blocks[i].size = block_size; 2802 } 2803 2804 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 2805 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 2806 2807 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 2808 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 2809 2810 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 2811 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 2812 2813 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 2814 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 2815 2816 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 2817 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 2818 2819 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 2820 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 2821 2822 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 2823 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 2824 2825 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 2826 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 2827 2828 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 2829 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 2830 for (i = 0 ; i < num_umr_blocks ; i++) { 2831 u8 nic_id, umr_block_id; 2832 2833 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 2834 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 2835 2836 blocks[umr_start_idx + i].address = 2837 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 2838 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 2839 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 2840 umr_block_id * NIC_UMR_OFFSET; 2841 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 2842 } 2843 2844 /* Expose decoder HW configuration block to user */ 2845 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 2846 2847 for (i = 1; i < NUM_OF_DCORES; ++i) { 2848 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 2849 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 2850 2851 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 2852 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 2853 2854 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 2855 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 2856 } 2857 } 2858 2859 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 2860 { 2861 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 2862 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 2863 int i, j, rc = 0; 2864 2865 /* The device ARC works with 32-bits addresses, and because there is a single HW register 2866 * that holds the extension bits (49..28), these bits must be identical in all the allocated 2867 * range. 2868 */ 2869 2870 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 2871 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 2872 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 2873 if (!virt_addr_arr[i]) { 2874 rc = -ENOMEM; 2875 goto free_dma_mem_arr; 2876 } 2877 2878 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 2879 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 2880 break; 2881 } 2882 2883 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 2884 dev_err(hdev->dev, 2885 "MSB of ARC accessible DMA memory are not identical in all range\n"); 2886 rc = -EFAULT; 2887 goto free_dma_mem_arr; 2888 } 2889 2890 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 2891 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 2892 2893 free_dma_mem_arr: 2894 for (j = 0 ; j < i ; j++) 2895 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 2896 dma_addr_arr[j]); 2897 2898 return rc; 2899 } 2900 2901 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 2902 { 2903 struct asic_fixed_properties *prop = &hdev->asic_prop; 2904 struct pci_mem_region *region; 2905 2906 /* CFG */ 2907 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 2908 region->region_base = CFG_BASE; 2909 region->region_size = CFG_SIZE; 2910 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 2911 region->bar_size = CFG_BAR_SIZE; 2912 region->bar_id = SRAM_CFG_BAR_ID; 2913 region->used = 1; 2914 2915 /* SRAM */ 2916 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 2917 region->region_base = SRAM_BASE_ADDR; 2918 region->region_size = SRAM_SIZE; 2919 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 2920 region->bar_size = CFG_BAR_SIZE; 2921 region->bar_id = SRAM_CFG_BAR_ID; 2922 region->used = 1; 2923 2924 /* DRAM */ 2925 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 2926 region->region_base = DRAM_PHYS_BASE; 2927 region->region_size = hdev->asic_prop.dram_size; 2928 region->offset_in_bar = 0; 2929 region->bar_size = prop->dram_pci_bar_size; 2930 region->bar_id = DRAM_BAR_ID; 2931 region->used = 1; 2932 } 2933 2934 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 2935 { 2936 struct asic_fixed_properties *prop = &hdev->asic_prop; 2937 int i, j, k; 2938 2939 /* Initialize common user CQ interrupt */ 2940 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 2941 HL_COMMON_USER_CQ_INTERRUPT_ID, false); 2942 2943 /* Initialize common decoder interrupt */ 2944 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 2945 HL_COMMON_DEC_INTERRUPT_ID, true); 2946 2947 /* User interrupts structure holds both decoder and user interrupts from various engines. 2948 * We first initialize the decoder interrupts and then we add the user interrupts. 2949 * The only limitation is that the last decoder interrupt id must be smaller 2950 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 2951 */ 2952 2953 /* Initialize decoder interrupts, expose only normal interrupts, 2954 * error interrupts to be handled by driver 2955 */ 2956 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 2957 i += 2, j++) 2958 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true); 2959 2960 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 2961 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false); 2962 } 2963 2964 static inline int gaudi2_get_non_zero_random_int(void) 2965 { 2966 int rand = get_random_u32(); 2967 2968 return rand ? rand : 1; 2969 } 2970 2971 static int gaudi2_sw_init(struct hl_device *hdev) 2972 { 2973 struct asic_fixed_properties *prop = &hdev->asic_prop; 2974 struct gaudi2_device *gaudi2; 2975 int i, rc; 2976 2977 /* Allocate device structure */ 2978 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 2979 if (!gaudi2) 2980 return -ENOMEM; 2981 2982 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 2983 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 2984 continue; 2985 2986 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 2987 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 2988 GAUDI2_EVENT_SIZE); 2989 rc = -EINVAL; 2990 goto free_gaudi2_device; 2991 } 2992 2993 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 2994 } 2995 2996 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 2997 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 2998 2999 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 3000 3001 hdev->asic_specific = gaudi2; 3002 3003 /* Create DMA pool for small allocations. 3004 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 3005 * PI/CI registers allocated from this pool have this restriction 3006 */ 3007 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 3008 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 3009 if (!hdev->dma_pool) { 3010 dev_err(hdev->dev, "failed to create DMA pool\n"); 3011 rc = -ENOMEM; 3012 goto free_gaudi2_device; 3013 } 3014 3015 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3016 if (rc) 3017 goto free_dma_pool; 3018 3019 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3020 if (!hdev->cpu_accessible_dma_pool) { 3021 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3022 rc = -ENOMEM; 3023 goto free_cpu_dma_mem; 3024 } 3025 3026 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3027 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3028 if (rc) { 3029 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3030 rc = -EFAULT; 3031 goto free_cpu_accessible_dma_pool; 3032 } 3033 3034 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3035 &gaudi2->virt_msix_db_dma_addr); 3036 if (!gaudi2->virt_msix_db_cpu_addr) { 3037 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3038 rc = -ENOMEM; 3039 goto free_cpu_accessible_dma_pool; 3040 } 3041 3042 spin_lock_init(&gaudi2->hw_queues_lock); 3043 3044 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3045 &gaudi2->scratchpad_bus_address, 3046 GFP_KERNEL | __GFP_ZERO); 3047 if (!gaudi2->scratchpad_kernel_address) { 3048 rc = -ENOMEM; 3049 goto free_virt_msix_db_mem; 3050 } 3051 3052 gaudi2_user_mapped_blocks_init(hdev); 3053 3054 /* Initialize user interrupts */ 3055 gaudi2_user_interrupt_setup(hdev); 3056 3057 hdev->supports_coresight = true; 3058 hdev->supports_sync_stream = true; 3059 hdev->supports_cb_mapping = true; 3060 hdev->supports_wait_for_multi_cs = false; 3061 3062 prop->supports_compute_reset = true; 3063 3064 hdev->asic_funcs->set_pci_memory_regions(hdev); 3065 3066 return 0; 3067 3068 free_virt_msix_db_mem: 3069 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3070 free_cpu_accessible_dma_pool: 3071 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3072 free_cpu_dma_mem: 3073 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3074 hdev->cpu_accessible_dma_address); 3075 free_dma_pool: 3076 dma_pool_destroy(hdev->dma_pool); 3077 free_gaudi2_device: 3078 kfree(gaudi2); 3079 return rc; 3080 } 3081 3082 static int gaudi2_sw_fini(struct hl_device *hdev) 3083 { 3084 struct asic_fixed_properties *prop = &hdev->asic_prop; 3085 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3086 3087 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3088 3089 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3090 3091 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3092 hdev->cpu_accessible_dma_address); 3093 3094 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3095 gaudi2->scratchpad_bus_address); 3096 3097 dma_pool_destroy(hdev->dma_pool); 3098 3099 kfree(gaudi2); 3100 3101 return 0; 3102 } 3103 3104 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3105 { 3106 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3107 QM_GLBL_CFG1_CQF_STOP | 3108 QM_GLBL_CFG1_CP_STOP); 3109 3110 /* stop also the ARC */ 3111 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3112 } 3113 3114 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3115 { 3116 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3117 QM_GLBL_CFG1_CQF_FLUSH | 3118 QM_GLBL_CFG1_CP_FLUSH); 3119 } 3120 3121 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3122 { 3123 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3124 } 3125 3126 /** 3127 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3128 * 3129 * @hdev: pointer to the habanalabs device structure 3130 * @queue_id: queue to clear fence counters to 3131 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3132 * getting stuck on any fence value. otherwise set all fence 3133 * counters to 0 (standard clear of fence counters) 3134 */ 3135 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3136 bool skip_fence) 3137 { 3138 u32 size, reg_base; 3139 u32 addr, val; 3140 3141 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3142 3143 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3144 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3145 3146 /* 3147 * in case we want to make sure that QM that is stuck on a fence will 3148 * be released we should set the fence counter to a higher value that 3149 * the value the QM waiting for. to comply with any fence counter of 3150 * any value we set maximum fence value to all counters 3151 */ 3152 val = skip_fence ? U32_MAX : 0; 3153 gaudi2_memset_device_lbw(hdev, addr, size, val); 3154 } 3155 3156 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3157 { 3158 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3159 3160 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3161 gaudi2_flush_qman_common(hdev, reg_base); 3162 gaudi2_flush_qman_arc_common(hdev, reg_base); 3163 } 3164 3165 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3166 { 3167 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3168 int dcore, inst; 3169 3170 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3171 goto stop_edma_qmans; 3172 3173 /* Stop CPs of PDMA QMANs */ 3174 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3175 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3176 3177 stop_edma_qmans: 3178 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3179 return; 3180 3181 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3182 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3183 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3184 u32 qm_base; 3185 3186 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3187 continue; 3188 3189 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3190 inst * DCORE_EDMA_OFFSET; 3191 3192 /* Stop CPs of EDMA QMANs */ 3193 gaudi2_stop_qman_common(hdev, qm_base); 3194 } 3195 } 3196 } 3197 3198 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3199 { 3200 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3201 u32 offset, i; 3202 3203 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3204 3205 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3206 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3207 continue; 3208 3209 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3210 } 3211 } 3212 3213 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3214 { 3215 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3216 u32 reg_base; 3217 int i; 3218 3219 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3220 return; 3221 3222 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3223 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3224 continue; 3225 3226 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3227 gaudi2_stop_qman_common(hdev, reg_base); 3228 } 3229 } 3230 3231 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3232 { 3233 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3234 u32 reg_base; 3235 int i; 3236 3237 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3238 return; 3239 3240 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3241 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3242 continue; 3243 3244 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3245 gaudi2_stop_qman_common(hdev, reg_base); 3246 } 3247 } 3248 3249 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3250 { 3251 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3252 u32 reg_base, queue_id; 3253 int i; 3254 3255 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3256 return; 3257 3258 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3259 3260 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3261 if (!(hdev->nic_ports_mask & BIT(i))) 3262 continue; 3263 3264 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3265 gaudi2_stop_qman_common(hdev, reg_base); 3266 } 3267 } 3268 3269 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3270 { 3271 u32 reg_val; 3272 3273 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3274 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3275 } 3276 3277 static void gaudi2_dma_stall(struct hl_device *hdev) 3278 { 3279 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3280 int dcore, inst; 3281 3282 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3283 goto stall_edma; 3284 3285 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3286 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3287 3288 stall_edma: 3289 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3290 return; 3291 3292 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3293 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3294 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3295 u32 core_base; 3296 3297 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3298 continue; 3299 3300 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3301 inst * DCORE_EDMA_OFFSET; 3302 3303 /* Stall CPs of EDMA QMANs */ 3304 gaudi2_stall_dma_common(hdev, core_base); 3305 } 3306 } 3307 } 3308 3309 static void gaudi2_mme_stall(struct hl_device *hdev) 3310 { 3311 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3312 u32 offset, i; 3313 3314 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3315 3316 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3317 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3318 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3319 } 3320 3321 static void gaudi2_tpc_stall(struct hl_device *hdev) 3322 { 3323 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3324 u32 reg_base; 3325 int i; 3326 3327 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3328 return; 3329 3330 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3331 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3332 continue; 3333 3334 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3335 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3336 } 3337 } 3338 3339 static void gaudi2_rotator_stall(struct hl_device *hdev) 3340 { 3341 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3342 u32 reg_val; 3343 int i; 3344 3345 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3346 return; 3347 3348 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3349 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3350 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3351 3352 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3353 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3354 continue; 3355 3356 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3357 } 3358 } 3359 3360 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3361 { 3362 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3363 } 3364 3365 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3366 { 3367 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3368 int dcore, inst; 3369 3370 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3371 goto stop_edma_qmans; 3372 3373 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3374 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3375 3376 stop_edma_qmans: 3377 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3378 return; 3379 3380 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3381 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3382 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3383 u32 qm_base; 3384 3385 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3386 continue; 3387 3388 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3389 inst * DCORE_EDMA_OFFSET; 3390 3391 /* Disable CPs of EDMA QMANs */ 3392 gaudi2_disable_qman_common(hdev, qm_base); 3393 } 3394 } 3395 } 3396 3397 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3398 { 3399 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3400 u32 offset, i; 3401 3402 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3403 3404 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3405 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3406 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3407 } 3408 3409 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3410 { 3411 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3412 u32 reg_base; 3413 int i; 3414 3415 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3416 return; 3417 3418 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3419 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3420 continue; 3421 3422 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3423 gaudi2_disable_qman_common(hdev, reg_base); 3424 } 3425 } 3426 3427 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 3428 { 3429 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3430 u32 reg_base; 3431 int i; 3432 3433 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3434 return; 3435 3436 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3437 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3438 continue; 3439 3440 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3441 gaudi2_disable_qman_common(hdev, reg_base); 3442 } 3443 } 3444 3445 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 3446 { 3447 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3448 u32 reg_base, queue_id; 3449 int i; 3450 3451 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3452 return; 3453 3454 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3455 3456 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3457 if (!(hdev->nic_ports_mask & BIT(i))) 3458 continue; 3459 3460 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3461 gaudi2_disable_qman_common(hdev, reg_base); 3462 } 3463 } 3464 3465 static void gaudi2_enable_timestamp(struct hl_device *hdev) 3466 { 3467 /* Disable the timestamp counter */ 3468 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3469 3470 /* Zero the lower/upper parts of the 64-bit counter */ 3471 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 3472 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 3473 3474 /* Enable the counter */ 3475 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 3476 } 3477 3478 static void gaudi2_disable_timestamp(struct hl_device *hdev) 3479 { 3480 /* Disable the timestamp counter */ 3481 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3482 } 3483 3484 static const char *gaudi2_irq_name(u16 irq_number) 3485 { 3486 switch (irq_number) { 3487 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 3488 return "gaudi2 cpu eq"; 3489 case GAUDI2_IRQ_NUM_COMPLETION: 3490 return "gaudi2 completion"; 3491 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 3492 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 3493 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 3494 return "gaudi2 user completion"; 3495 default: 3496 return "invalid"; 3497 } 3498 } 3499 3500 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 3501 { 3502 int i, irq, relative_idx; 3503 struct hl_dec *dec; 3504 3505 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 3506 irq = pci_irq_vector(hdev->pdev, i); 3507 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3508 3509 dec = hdev->dec + relative_idx / 2; 3510 3511 /* We pass different structures depending on the irq handler. For the abnormal 3512 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3513 * user_interrupt entry 3514 */ 3515 free_irq(irq, ((relative_idx % 2) ? 3516 (void *) dec : 3517 (void *) &hdev->user_interrupt[dec->core_id])); 3518 } 3519 } 3520 3521 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 3522 { 3523 int rc, i, irq_init_cnt, irq, relative_idx; 3524 irq_handler_t irq_handler; 3525 struct hl_dec *dec; 3526 3527 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 3528 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 3529 i++, irq_init_cnt++) { 3530 3531 irq = pci_irq_vector(hdev->pdev, i); 3532 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 3533 3534 irq_handler = (relative_idx % 2) ? 3535 hl_irq_handler_dec_abnrm : 3536 hl_irq_handler_user_interrupt; 3537 3538 dec = hdev->dec + relative_idx / 2; 3539 3540 /* We pass different structures depending on the irq handler. For the abnormal 3541 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 3542 * user_interrupt entry 3543 */ 3544 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), 3545 ((relative_idx % 2) ? 3546 (void *) dec : 3547 (void *) &hdev->user_interrupt[dec->core_id])); 3548 if (rc) { 3549 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3550 goto free_dec_irqs; 3551 } 3552 } 3553 3554 return 0; 3555 3556 free_dec_irqs: 3557 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 3558 return rc; 3559 } 3560 3561 static int gaudi2_enable_msix(struct hl_device *hdev) 3562 { 3563 struct asic_fixed_properties *prop = &hdev->asic_prop; 3564 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3565 int rc, irq, i, j, user_irq_init_cnt; 3566 irq_handler_t irq_handler; 3567 struct hl_cq *cq; 3568 3569 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 3570 return 0; 3571 3572 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 3573 PCI_IRQ_MSIX); 3574 if (rc < 0) { 3575 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 3576 GAUDI2_MSIX_ENTRIES, rc); 3577 return rc; 3578 } 3579 3580 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3581 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3582 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 3583 if (rc) { 3584 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3585 goto free_irq_vectors; 3586 } 3587 3588 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3589 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 3590 &hdev->event_queue); 3591 if (rc) { 3592 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3593 goto free_completion_irq; 3594 } 3595 3596 rc = gaudi2_dec_enable_msix(hdev); 3597 if (rc) { 3598 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 3599 goto free_event_irq; 3600 } 3601 3602 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 3603 user_irq_init_cnt < prop->user_interrupt_count; 3604 i++, j++, user_irq_init_cnt++) { 3605 3606 irq = pci_irq_vector(hdev->pdev, i); 3607 irq_handler = hl_irq_handler_user_interrupt; 3608 3609 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); 3610 if (rc) { 3611 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 3612 goto free_user_irq; 3613 } 3614 } 3615 3616 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 3617 3618 return 0; 3619 3620 free_user_irq: 3621 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 3622 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 3623 3624 irq = pci_irq_vector(hdev->pdev, i); 3625 free_irq(irq, &hdev->user_interrupt[j]); 3626 } 3627 3628 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3629 3630 free_event_irq: 3631 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3632 free_irq(irq, cq); 3633 3634 free_completion_irq: 3635 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3636 free_irq(irq, cq); 3637 3638 free_irq_vectors: 3639 pci_free_irq_vectors(hdev->pdev); 3640 3641 return rc; 3642 } 3643 3644 static void gaudi2_sync_irqs(struct hl_device *hdev) 3645 { 3646 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3647 int i, j; 3648 int irq; 3649 3650 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3651 return; 3652 3653 /* Wait for all pending IRQs to be finished */ 3654 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 3655 3656 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 3657 irq = pci_irq_vector(hdev->pdev, i); 3658 synchronize_irq(irq); 3659 } 3660 3661 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 3662 i++, j++) { 3663 irq = pci_irq_vector(hdev->pdev, i); 3664 synchronize_irq(irq); 3665 } 3666 3667 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 3668 } 3669 3670 static void gaudi2_disable_msix(struct hl_device *hdev) 3671 { 3672 struct asic_fixed_properties *prop = &hdev->asic_prop; 3673 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3674 struct hl_cq *cq; 3675 int irq, i, j, k; 3676 3677 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 3678 return; 3679 3680 gaudi2_sync_irqs(hdev); 3681 3682 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3683 free_irq(irq, &hdev->event_queue); 3684 3685 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3686 3687 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 3688 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 3689 3690 irq = pci_irq_vector(hdev->pdev, i); 3691 free_irq(irq, &hdev->user_interrupt[j]); 3692 } 3693 3694 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 3695 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 3696 free_irq(irq, cq); 3697 3698 pci_free_irq_vectors(hdev->pdev); 3699 3700 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 3701 } 3702 3703 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 3704 { 3705 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3706 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3707 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3708 int rc; 3709 3710 if (hdev->pldm) 3711 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3712 else 3713 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3714 3715 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3716 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 3717 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3718 continue; 3719 3720 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 3721 3722 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 3723 3724 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3725 3726 /* Wait till all traffic from decoder stops 3727 * before apply core reset. 3728 */ 3729 rc = hl_poll_timeout( 3730 hdev, 3731 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3732 graceful, 3733 (graceful & graceful_pend_mask), 3734 100, 3735 timeout_usec); 3736 if (rc) 3737 dev_err(hdev->dev, 3738 "Failed to stop traffic from DCORE%d Decoder %d\n", 3739 dcore_id, dec_id); 3740 } 3741 } 3742 3743 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 3744 { 3745 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 3746 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 3747 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 3748 int rc; 3749 3750 if (hdev->pldm) 3751 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 3752 else 3753 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 3754 3755 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 3756 dec_bit = PCIE_DEC_SHIFT + dec_id; 3757 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 3758 continue; 3759 3760 offset = dec_id * PCIE_VDEC_OFFSET; 3761 3762 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 3763 3764 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 3765 3766 /* Wait till all traffic from decoder stops 3767 * before apply core reset. 3768 */ 3769 rc = hl_poll_timeout( 3770 hdev, 3771 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 3772 graceful, 3773 (graceful & graceful_pend_mask), 3774 100, 3775 timeout_usec); 3776 if (rc) 3777 dev_err(hdev->dev, 3778 "Failed to stop traffic from PCIe Decoder %d\n", 3779 dec_id); 3780 } 3781 } 3782 3783 static void gaudi2_stop_dec(struct hl_device *hdev) 3784 { 3785 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3786 int dcore_id; 3787 3788 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 3789 return; 3790 3791 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 3792 gaudi2_stop_dcore_dec(hdev, dcore_id); 3793 3794 gaudi2_stop_pcie_dec(hdev); 3795 } 3796 3797 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3798 { 3799 u32 reg_base, reg_val; 3800 3801 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3802 if (run_mode == HL_ENGINE_CORE_RUN) 3803 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 3804 else 3805 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3806 3807 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 3808 } 3809 3810 static void gaudi2_halt_arcs(struct hl_device *hdev) 3811 { 3812 u16 arc_id; 3813 3814 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 3815 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3816 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 3817 } 3818 } 3819 3820 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3821 { 3822 int rc; 3823 u32 reg_base, val, ack_mask, timeout_usec = 100000; 3824 3825 if (hdev->pldm) 3826 timeout_usec *= 100; 3827 3828 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3829 if (run_mode == HL_ENGINE_CORE_RUN) 3830 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 3831 else 3832 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 3833 3834 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 3835 val, ((val & ack_mask) == ack_mask), 3836 1000, timeout_usec); 3837 3838 if (!rc) { 3839 /* Clear */ 3840 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 3841 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 3842 } 3843 3844 return rc; 3845 } 3846 3847 static void gaudi2_reset_arcs(struct hl_device *hdev) 3848 { 3849 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3850 u16 arc_id; 3851 3852 if (!gaudi2) 3853 return; 3854 3855 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 3856 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3857 gaudi2_clr_arc_id_cap(hdev, arc_id); 3858 } 3859 3860 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 3861 { 3862 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3863 u32 queue_id; 3864 int i; 3865 3866 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3867 return; 3868 3869 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3870 3871 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3872 if (!(hdev->nic_ports_mask & BIT(i))) 3873 continue; 3874 3875 gaudi2_qman_manual_flush_common(hdev, queue_id); 3876 } 3877 } 3878 3879 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 3880 u32 num_cores, u32 core_command) 3881 { 3882 int i, rc; 3883 3884 3885 for (i = 0 ; i < num_cores ; i++) { 3886 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 3887 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 3888 } 3889 3890 for (i = 0 ; i < num_cores ; i++) { 3891 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 3892 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 3893 3894 if (rc) { 3895 dev_err(hdev->dev, "failed to %s arc: %d\n", 3896 (core_command == HL_ENGINE_CORE_HALT) ? 3897 "HALT" : "RUN", core_ids[i]); 3898 return -1; 3899 } 3900 } 3901 } 3902 3903 return 0; 3904 } 3905 3906 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3907 { 3908 u32 wait_timeout_ms; 3909 3910 if (hdev->pldm) 3911 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 3912 else 3913 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 3914 3915 if (fw_reset) 3916 goto skip_engines; 3917 3918 gaudi2_stop_dma_qmans(hdev); 3919 gaudi2_stop_mme_qmans(hdev); 3920 gaudi2_stop_tpc_qmans(hdev); 3921 gaudi2_stop_rot_qmans(hdev); 3922 gaudi2_stop_nic_qmans(hdev); 3923 msleep(wait_timeout_ms); 3924 3925 gaudi2_halt_arcs(hdev); 3926 gaudi2_dma_stall(hdev); 3927 gaudi2_mme_stall(hdev); 3928 gaudi2_tpc_stall(hdev); 3929 gaudi2_rotator_stall(hdev); 3930 3931 msleep(wait_timeout_ms); 3932 3933 gaudi2_stop_dec(hdev); 3934 3935 /* 3936 * in case of soft reset do a manual flush for QMANs (currently called 3937 * only for NIC QMANs 3938 */ 3939 if (!hard_reset) 3940 gaudi2_nic_qmans_manual_flush(hdev); 3941 3942 gaudi2_disable_dma_qmans(hdev); 3943 gaudi2_disable_mme_qmans(hdev); 3944 gaudi2_disable_tpc_qmans(hdev); 3945 gaudi2_disable_rot_qmans(hdev); 3946 gaudi2_disable_nic_qmans(hdev); 3947 gaudi2_disable_timestamp(hdev); 3948 3949 skip_engines: 3950 if (hard_reset) { 3951 gaudi2_disable_msix(hdev); 3952 return; 3953 } 3954 3955 gaudi2_sync_irqs(hdev); 3956 } 3957 3958 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 3959 { 3960 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3961 3962 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3963 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3964 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3965 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3966 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3967 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 3968 } 3969 3970 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 3971 { 3972 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3973 struct dynamic_fw_load_mgr *dynamic_loader; 3974 struct cpu_dyn_regs *dyn_regs; 3975 3976 /* fill common fields */ 3977 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3978 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 3979 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 3980 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 3981 fw_loader->skip_bmc = false; 3982 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 3983 fw_loader->dram_bar_id = DRAM_BAR_ID; 3984 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 3985 3986 /* here we update initial values for few specific dynamic regs (as 3987 * before reading the first descriptor from FW those value has to be 3988 * hard-coded). in later stages of the protocol those values will be 3989 * updated automatically by reading the FW descriptor so data there 3990 * will always be up-to-date 3991 */ 3992 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3993 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3994 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3995 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3996 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 3997 } 3998 3999 static int gaudi2_init_cpu(struct hl_device *hdev) 4000 { 4001 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4002 int rc; 4003 4004 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 4005 return 0; 4006 4007 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 4008 return 0; 4009 4010 rc = hl_fw_init_cpu(hdev); 4011 if (rc) 4012 return rc; 4013 4014 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4015 4016 return 0; 4017 } 4018 4019 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4020 { 4021 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4022 struct asic_fixed_properties *prop = &hdev->asic_prop; 4023 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4024 struct cpu_dyn_regs *dyn_regs; 4025 struct hl_eq *eq; 4026 u32 status; 4027 int err; 4028 4029 if (!hdev->cpu_queues_enable) 4030 return 0; 4031 4032 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4033 return 0; 4034 4035 eq = &hdev->event_queue; 4036 4037 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4038 4039 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4040 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4041 4042 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4043 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4044 4045 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4046 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4047 4048 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4049 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4050 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4051 4052 /* Used for EQ CI */ 4053 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4054 4055 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4056 4057 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4058 4059 /* Let the ARC know we are ready as it is now handling those queues */ 4060 4061 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4062 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4063 4064 err = hl_poll_timeout( 4065 hdev, 4066 mmCPU_IF_QUEUE_INIT, 4067 status, 4068 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4069 1000, 4070 cpu_timeout); 4071 4072 if (err) { 4073 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4074 return -EIO; 4075 } 4076 4077 /* update FW application security bits */ 4078 if (prop->fw_cpu_boot_dev_sts0_valid) 4079 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4080 4081 if (prop->fw_cpu_boot_dev_sts1_valid) 4082 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4083 4084 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4085 return 0; 4086 } 4087 4088 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4089 u32 queue_id_base) 4090 { 4091 struct hl_hw_queue *q; 4092 u32 pq_id, pq_offset; 4093 4094 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4095 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4096 pq_offset = pq_id * 4; 4097 4098 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4099 lower_32_bits(q->bus_address)); 4100 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4101 upper_32_bits(q->bus_address)); 4102 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4103 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4104 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4105 } 4106 } 4107 4108 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4109 { 4110 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4111 4112 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4113 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4114 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4115 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4116 4117 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4118 cp_offset = cp_id * 4; 4119 4120 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4121 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4122 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4123 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4124 } 4125 4126 /* allow QMANs to accept work from ARC CQF */ 4127 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4128 } 4129 4130 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4131 u32 queue_id_base) 4132 { 4133 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4134 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4135 4136 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4137 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4138 4139 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4140 pq_offset = pq_id * 4; 4141 4142 /* Configure QMAN HBW to scratchpad as it is not needed */ 4143 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4144 lower_32_bits(gaudi2->scratchpad_bus_address)); 4145 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4146 upper_32_bits(gaudi2->scratchpad_bus_address)); 4147 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4148 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4149 4150 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4151 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4152 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4153 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4154 } 4155 4156 /* Enable QMAN H/W completion */ 4157 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4158 } 4159 4160 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4161 { 4162 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4163 u32 sp_reg_addr; 4164 4165 switch (queue_id_base) { 4166 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4167 fallthrough; 4168 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4169 fallthrough; 4170 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4171 fallthrough; 4172 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4173 fallthrough; 4174 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4175 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4176 break; 4177 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4178 fallthrough; 4179 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4180 fallthrough; 4181 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4182 fallthrough; 4183 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4184 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4185 break; 4186 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4187 fallthrough; 4188 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4189 fallthrough; 4190 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4191 fallthrough; 4192 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4193 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4194 break; 4195 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4196 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4197 break; 4198 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4199 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4200 break; 4201 default: 4202 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4203 return 0; 4204 } 4205 4206 return sp_reg_addr; 4207 } 4208 4209 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4210 u32 queue_id_base) 4211 { 4212 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4213 int map_table_entry; 4214 4215 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4216 4217 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4218 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4219 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4220 4221 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4222 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4223 gaudi2_irq_map_table[map_table_entry].cpu_id); 4224 4225 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4226 4227 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4228 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4229 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4230 4231 /* Enable the QMAN channel. 4232 * PDMA QMAN configuration is different, as we do not allow user to 4233 * access some of the CPs. 4234 * PDMA0: CP2/3 are reserved for the ARC usage. 4235 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4236 */ 4237 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4238 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4239 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4240 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4241 else 4242 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4243 } 4244 4245 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4246 u32 queue_id_base) 4247 { 4248 u32 pq_id; 4249 4250 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4251 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4252 4253 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4254 gaudi2_init_qman_cp(hdev, reg_base); 4255 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4256 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4257 } 4258 4259 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 4260 u32 dma_core_id, bool is_secure) 4261 { 4262 u32 prot, irq_handler_offset; 4263 struct cpu_dyn_regs *dyn_regs; 4264 int map_table_entry; 4265 4266 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 4267 if (is_secure) 4268 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 4269 4270 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 4271 4272 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4273 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 4274 4275 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 4276 lower_32_bits(CFG_BASE + irq_handler_offset)); 4277 4278 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 4279 upper_32_bits(CFG_BASE + irq_handler_offset)); 4280 4281 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 4282 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 4283 gaudi2_irq_map_table[map_table_entry].cpu_id); 4284 4285 /* Enable the DMA channel */ 4286 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 4287 } 4288 4289 static void gaudi2_init_kdma(struct hl_device *hdev) 4290 { 4291 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4292 u32 reg_base; 4293 4294 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 4295 return; 4296 4297 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 4298 4299 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 4300 4301 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 4302 } 4303 4304 static void gaudi2_init_pdma(struct hl_device *hdev) 4305 { 4306 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4307 u32 reg_base; 4308 4309 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 4310 return; 4311 4312 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 4313 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 4314 4315 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 4316 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 4317 4318 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 4319 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 4320 4321 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 4322 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 4323 4324 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 4325 } 4326 4327 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 4328 { 4329 u32 reg_base, base_edma_core_id, base_edma_qman_id; 4330 4331 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 4332 base_edma_qman_id = edma_stream_base[seq]; 4333 4334 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 4335 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 4336 4337 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 4338 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 4339 } 4340 4341 static void gaudi2_init_edma(struct hl_device *hdev) 4342 { 4343 struct asic_fixed_properties *prop = &hdev->asic_prop; 4344 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4345 int dcore, inst; 4346 4347 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 4348 return; 4349 4350 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 4351 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 4352 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 4353 4354 if (!(prop->edma_enabled_mask & BIT(seq))) 4355 continue; 4356 4357 gaudi2_init_edma_instance(hdev, seq); 4358 4359 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 4360 } 4361 } 4362 } 4363 4364 /* 4365 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 4366 * @hdev: pointer to habanalabs device structure. 4367 * @sob_id: sync object ID. 4368 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 4369 * @interrupt_id: interrupt ID. 4370 * 4371 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 4372 * write directly to the HBW host memory of the virtual MSI-X doorbell. 4373 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 4374 * 4375 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 4376 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 4377 * completion, by decrementing the sync object value and re-arming the monitor. 4378 */ 4379 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 4380 u32 first_mon_id, u32 interrupt_id) 4381 { 4382 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 4383 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4384 u64 addr; 4385 u8 mask; 4386 4387 /* Reset the SOB value */ 4388 sob_offset = sob_id * sizeof(u32); 4389 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 4390 4391 /* Configure 3 monitors: 4392 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 4393 * 2. Decrement SOB value by 1. 4394 * 3. Re-arm the master monitor. 4395 */ 4396 4397 first_mon_offset = first_mon_id * sizeof(u32); 4398 4399 /* 2nd monitor: Decrement SOB value by 1 */ 4400 mon_offset = first_mon_offset + sizeof(u32); 4401 4402 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 4403 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4404 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4405 4406 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 4407 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 4408 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 4409 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4410 4411 /* 3rd monitor: Re-arm the master monitor */ 4412 mon_offset = first_mon_offset + 2 * sizeof(u32); 4413 4414 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 4415 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4417 4418 sob_group = sob_id / 8; 4419 mask = ~BIT(sob_id & 0x7); 4420 mode = 0; /* comparison mode is "greater than or equal to" */ 4421 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 4422 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 4423 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 4424 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 4425 4426 payload = arm; 4427 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4428 4429 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 4430 mon_offset = first_mon_offset; 4431 4432 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 4433 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 4434 4435 addr = gaudi2->virt_msix_db_dma_addr; 4436 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 4437 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 4438 4439 payload = interrupt_id; 4440 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 4441 4442 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 4443 } 4444 4445 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 4446 { 4447 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 4448 struct asic_fixed_properties *prop = &hdev->asic_prop; 4449 4450 /* Decoder normal/abnormal interrupts */ 4451 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 4452 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 4453 continue; 4454 4455 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4456 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 4457 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 4458 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4459 4460 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4461 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 4462 interrupt_id += 1; 4463 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 4464 } 4465 } 4466 4467 static void gaudi2_init_sm(struct hl_device *hdev) 4468 { 4469 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4470 u64 cq_address; 4471 u32 reg_val; 4472 int i; 4473 4474 /* Enable HBW/LBW CQ for completion monitors */ 4475 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4476 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 4477 4478 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 4479 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4480 4481 /* Enable only HBW CQ for KDMA completion monitor */ 4482 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 4483 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 4484 4485 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 4486 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 4487 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 4488 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 4489 4490 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 4491 cq_address = 4492 hdev->completion_queue[i].bus_address; 4493 4494 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 4495 lower_32_bits(cq_address)); 4496 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 4497 upper_32_bits(cq_address)); 4498 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 4499 ilog2(HL_CQ_SIZE_IN_BYTES)); 4500 } 4501 4502 /* Configure kernel ASID and MMU BP*/ 4503 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 4504 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 4505 4506 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 4507 gaudi2_prepare_sm_for_virt_msix_db(hdev); 4508 } 4509 4510 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 4511 { 4512 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4513 u32 reg_val; 4514 int i; 4515 4516 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 4517 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 4518 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 4519 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 4520 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 4521 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 4522 4523 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 4524 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 4525 4526 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 4527 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 4528 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 4529 } 4530 } 4531 4532 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 4533 bool config_qman_only) 4534 { 4535 u32 queue_id_base, reg_base; 4536 4537 switch (dcore_id) { 4538 case 0: 4539 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 4540 break; 4541 case 1: 4542 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 4543 break; 4544 case 2: 4545 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 4546 break; 4547 case 3: 4548 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 4549 break; 4550 default: 4551 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 4552 return; 4553 } 4554 4555 if (!config_qman_only) { 4556 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 4557 gaudi2_init_mme_acc(hdev, reg_base); 4558 } 4559 4560 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 4561 gaudi2_init_qman(hdev, reg_base, queue_id_base); 4562 } 4563 4564 static void gaudi2_init_mme(struct hl_device *hdev) 4565 { 4566 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4567 int i; 4568 4569 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 4570 return; 4571 4572 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 4573 gaudi2_init_dcore_mme(hdev, i, false); 4574 4575 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 4576 } 4577 } 4578 4579 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 4580 { 4581 /* Mask arithmetic and QM interrupts in TPC */ 4582 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 4583 4584 /* Set 16 cache lines */ 4585 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 4586 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 4587 } 4588 4589 struct gaudi2_tpc_init_cfg_data { 4590 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 4591 }; 4592 4593 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 4594 u32 offset, struct iterate_module_ctx *ctx) 4595 { 4596 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4597 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 4598 u32 queue_id_base; 4599 u8 seq; 4600 4601 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 4602 4603 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 4604 /* gets last sequence number */ 4605 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 4606 else 4607 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 4608 4609 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 4610 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 4611 4612 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 4613 } 4614 4615 static void gaudi2_init_tpc(struct hl_device *hdev) 4616 { 4617 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4618 struct gaudi2_tpc_init_cfg_data init_cfg_data; 4619 struct iterate_module_ctx tpc_iter; 4620 4621 if (!hdev->asic_prop.tpc_enabled_mask) 4622 return; 4623 4624 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 4625 return; 4626 4627 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 4628 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 4629 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 4630 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 4631 tpc_iter.fn = &gaudi2_init_tpc_config; 4632 tpc_iter.data = &init_cfg_data; 4633 gaudi2_iterate_tpcs(hdev, &tpc_iter); 4634 } 4635 4636 static void gaudi2_init_rotator(struct hl_device *hdev) 4637 { 4638 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4639 u32 i, reg_base, queue_id; 4640 4641 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 4642 4643 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4644 reg_base = gaudi2_qm_blocks_bases[queue_id]; 4645 gaudi2_init_qman(hdev, reg_base, queue_id); 4646 4647 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 4648 } 4649 } 4650 4651 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 4652 { 4653 u32 sob_id; 4654 4655 /* VCMD normal interrupt */ 4656 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 4657 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 4658 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4659 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4660 4661 /* VCMD abnormal interrupt */ 4662 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 4663 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 4664 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 4665 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 4666 } 4667 4668 static void gaudi2_init_dec(struct hl_device *hdev) 4669 { 4670 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4671 u32 dcore_id, dec_id, dec_bit; 4672 u64 base_addr; 4673 4674 if (!hdev->asic_prop.decoder_enabled_mask) 4675 return; 4676 4677 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 4678 return; 4679 4680 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4681 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4682 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4683 4684 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4685 continue; 4686 4687 base_addr = mmDCORE0_DEC0_CMD_BASE + 4688 BRDG_CTRL_BLOCK_OFFSET + 4689 dcore_id * DCORE_OFFSET + 4690 dec_id * DCORE_VDEC_OFFSET; 4691 4692 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4693 4694 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4695 } 4696 4697 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 4698 dec_bit = PCIE_DEC_SHIFT + dec_id; 4699 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4700 continue; 4701 4702 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 4703 dec_id * DCORE_VDEC_OFFSET; 4704 4705 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 4706 4707 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 4708 } 4709 } 4710 4711 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 4712 u32 stlb_base, u32 asid, u64 phys_addr) 4713 { 4714 u32 status, timeout_usec; 4715 int rc; 4716 4717 if (hdev->pldm || !hdev->pdev) 4718 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 4719 else 4720 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 4721 4722 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 4723 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 4724 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 4725 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 4726 4727 rc = hl_poll_timeout( 4728 hdev, 4729 stlb_base + STLB_BUSY_OFFSET, 4730 status, 4731 !(status & 0x80000000), 4732 1000, 4733 timeout_usec); 4734 4735 if (rc) { 4736 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 4737 return rc; 4738 } 4739 4740 return 0; 4741 } 4742 4743 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 4744 u32 start_offset, u32 inv_start_val, 4745 u32 flags) 4746 { 4747 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 4748 if (flags & MMU_OP_CLEAR_MEMCACHE) 4749 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 4750 4751 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 4752 return; 4753 4754 WREG32(stlb_base + start_offset, inv_start_val); 4755 } 4756 4757 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 4758 struct gaudi2_cache_invld_params *inv_params) 4759 { 4760 u32 status, timeout_usec, start_offset; 4761 int rc; 4762 4763 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 4764 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 4765 4766 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 4767 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 4768 rc = hl_poll_timeout( 4769 hdev, 4770 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 4771 status, 4772 status & 0x1, 4773 1000, 4774 timeout_usec); 4775 4776 if (rc) 4777 return rc; 4778 4779 /* Need to manually reset the status to 0 */ 4780 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 4781 } 4782 4783 /* Lower cache does not work with cache lines, hence we can skip its 4784 * invalidation upon map and invalidate only upon unmap 4785 */ 4786 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 4787 return 0; 4788 4789 start_offset = inv_params->range_invalidation ? 4790 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 4791 4792 rc = hl_poll_timeout( 4793 hdev, 4794 stlb_base + start_offset, 4795 status, 4796 !(status & 0x1), 4797 1000, 4798 timeout_usec); 4799 4800 return rc; 4801 } 4802 4803 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 4804 { 4805 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4806 u32 hw_cap; 4807 4808 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 4809 4810 if (gaudi2->hw_cap_initialized & hw_cap) 4811 return true; 4812 4813 return false; 4814 } 4815 4816 /* this function shall be called only for HMMUs for which capability bit is set */ 4817 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 4818 { 4819 u32 offset; 4820 4821 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 4822 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 4823 } 4824 4825 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 4826 struct gaudi2_cache_invld_params *inv_params) 4827 { 4828 u32 start_offset; 4829 4830 if (inv_params->range_invalidation) { 4831 /* Set the addresses range 4832 * Note: that the start address we set in register, is not included in 4833 * the range of the invalidation, by design. 4834 * that's why we need to set lower address than the one we actually 4835 * want to be included in the range invalidation. 4836 */ 4837 u64 start = inv_params->start_va - 1; 4838 4839 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 4840 4841 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 4842 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 4843 4844 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 4845 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 4846 4847 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 4848 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 4849 4850 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 4851 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 4852 } else { 4853 start_offset = STLB_INV_ALL_START_OFFSET; 4854 } 4855 4856 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 4857 inv_params->inv_start_val, inv_params->flags); 4858 } 4859 4860 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 4861 int dcore_id, int hmmu_id, 4862 struct gaudi2_cache_invld_params *inv_params) 4863 { 4864 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4865 4866 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 4867 } 4868 4869 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 4870 int dcore_id, int hmmu_id, 4871 struct gaudi2_cache_invld_params *inv_params) 4872 { 4873 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 4874 4875 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 4876 } 4877 4878 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 4879 struct gaudi2_cache_invld_params *inv_params) 4880 { 4881 int dcore_id, hmmu_id; 4882 4883 /* first send all invalidation commands */ 4884 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4885 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4886 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4887 continue; 4888 4889 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 4890 } 4891 } 4892 4893 /* next, poll all invalidations status */ 4894 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 4895 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 4896 int rc; 4897 4898 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 4899 continue; 4900 4901 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 4902 inv_params); 4903 if (rc) 4904 return rc; 4905 } 4906 } 4907 4908 return 0; 4909 } 4910 4911 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 4912 { 4913 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4914 struct gaudi2_cache_invld_params invld_params; 4915 int rc = 0; 4916 4917 if (hdev->reset_info.hard_reset_pending) 4918 return rc; 4919 4920 invld_params.range_invalidation = false; 4921 invld_params.inv_start_val = 1; 4922 4923 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4924 invld_params.flags = flags; 4925 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4926 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4927 &invld_params); 4928 } else if (flags & MMU_OP_PHYS_PACK) { 4929 invld_params.flags = 0; 4930 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4931 } 4932 4933 return rc; 4934 } 4935 4936 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 4937 u32 flags, u32 asid, u64 va, u64 size) 4938 { 4939 struct gaudi2_cache_invld_params invld_params = {0}; 4940 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4941 u64 start_va, end_va; 4942 u32 inv_start_val; 4943 int rc = 0; 4944 4945 if (hdev->reset_info.hard_reset_pending) 4946 return 0; 4947 4948 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 4949 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 4950 asid << MMU_RANGE_INV_ASID_SHIFT); 4951 start_va = va; 4952 end_va = start_va + size; 4953 4954 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 4955 /* As range invalidation does not support zero address we will 4956 * do full invalidation in this case 4957 */ 4958 if (start_va) { 4959 invld_params.range_invalidation = true; 4960 invld_params.start_va = start_va; 4961 invld_params.end_va = end_va; 4962 invld_params.inv_start_val = inv_start_val; 4963 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 4964 } else { 4965 invld_params.range_invalidation = false; 4966 invld_params.inv_start_val = 1; 4967 invld_params.flags = flags; 4968 } 4969 4970 4971 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 4972 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 4973 &invld_params); 4974 if (rc) 4975 return rc; 4976 4977 } else if (flags & MMU_OP_PHYS_PACK) { 4978 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 4979 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 4980 invld_params.inv_start_val = inv_start_val; 4981 invld_params.flags = flags; 4982 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 4983 } 4984 4985 return rc; 4986 } 4987 4988 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 4989 { 4990 struct asic_fixed_properties *prop = &hdev->asic_prop; 4991 u64 hop0_addr; 4992 u32 asid, max_asid = prop->max_asid; 4993 int rc; 4994 4995 /* it takes too much time to init all of the ASIDs on palladium */ 4996 if (hdev->pldm) 4997 max_asid = min((u32) 8, max_asid); 4998 4999 for (asid = 0 ; asid < max_asid ; asid++) { 5000 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 5001 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 5002 if (rc) { 5003 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 5004 return rc; 5005 } 5006 } 5007 5008 return 0; 5009 } 5010 5011 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5012 { 5013 u32 status, timeout_usec; 5014 int rc; 5015 5016 if (hdev->pldm || !hdev->pdev) 5017 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5018 else 5019 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5020 5021 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5022 5023 rc = hl_poll_timeout( 5024 hdev, 5025 stlb_base + STLB_SRAM_INIT_OFFSET, 5026 status, 5027 !status, 5028 1000, 5029 timeout_usec); 5030 5031 if (rc) 5032 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5033 5034 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5035 if (rc) 5036 return rc; 5037 5038 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5039 5040 rc = hl_poll_timeout( 5041 hdev, 5042 stlb_base + STLB_INV_ALL_START_OFFSET, 5043 status, 5044 !status, 5045 1000, 5046 timeout_usec); 5047 5048 if (rc) 5049 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5050 5051 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5052 5053 return rc; 5054 } 5055 5056 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5057 { 5058 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5059 u32 mmu_base, stlb_base; 5060 int rc; 5061 5062 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5063 return 0; 5064 5065 mmu_base = mmPMMU_HBW_MMU_BASE; 5066 stlb_base = mmPMMU_HBW_STLB_BASE; 5067 5068 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5069 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5070 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5071 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5072 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5073 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5074 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5075 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5076 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5077 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5078 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5079 5080 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5081 5082 if (PAGE_SIZE == SZ_64K) { 5083 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5084 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5085 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5086 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5087 FIELD_PREP( 5088 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5089 1), 5090 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5091 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5092 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5093 } 5094 5095 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5096 5097 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5098 if (rc) 5099 return rc; 5100 5101 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5102 5103 return 0; 5104 } 5105 5106 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5107 int hmmu_id) 5108 { 5109 struct asic_fixed_properties *prop = &hdev->asic_prop; 5110 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5111 u32 offset, mmu_base, stlb_base, hw_cap; 5112 u8 dmmu_seq; 5113 int rc; 5114 5115 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5116 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5117 5118 /* 5119 * return if DMMU is already initialized or if it's not out of 5120 * isolation (due to cluster binning) 5121 */ 5122 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5123 return 0; 5124 5125 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5126 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5127 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5128 5129 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5130 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5131 5132 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5133 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5134 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5135 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5136 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5137 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5138 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5139 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5140 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5141 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5142 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5143 5144 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5145 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5146 5147 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5148 5149 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5150 if (rc) 5151 return rc; 5152 5153 gaudi2->hw_cap_initialized |= hw_cap; 5154 5155 return 0; 5156 } 5157 5158 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5159 { 5160 int rc, dcore_id, hmmu_id; 5161 5162 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5163 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5164 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5165 if (rc) 5166 return rc; 5167 } 5168 5169 return 0; 5170 } 5171 5172 static int gaudi2_mmu_init(struct hl_device *hdev) 5173 { 5174 int rc; 5175 5176 rc = gaudi2_pci_mmu_init(hdev); 5177 if (rc) 5178 return rc; 5179 5180 rc = gaudi2_hbm_mmu_init(hdev); 5181 if (rc) 5182 return rc; 5183 5184 return 0; 5185 } 5186 5187 static int gaudi2_hw_init(struct hl_device *hdev) 5188 { 5189 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5190 int rc; 5191 5192 /* Let's mark in the H/W that we have reached this point. We check 5193 * this value in the reset_before_init function to understand whether 5194 * we need to reset the chip before doing H/W init. This register is 5195 * cleared by the H/W upon H/W reset 5196 */ 5197 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5198 5199 /* Perform read from the device to make sure device is up */ 5200 RREG32(mmHW_STATE); 5201 5202 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5203 * So we set it here and if anyone tries to move it later to 5204 * a different address, there will be an error 5205 */ 5206 if (hdev->asic_prop.iatu_done_by_fw) 5207 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5208 5209 /* 5210 * Before pushing u-boot/linux to device, need to set the hbm bar to 5211 * base address of dram 5212 */ 5213 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5214 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5215 return -EIO; 5216 } 5217 5218 rc = gaudi2_init_cpu(hdev); 5219 if (rc) { 5220 dev_err(hdev->dev, "failed to initialize CPU\n"); 5221 return rc; 5222 } 5223 5224 gaudi2_init_scrambler_hbm(hdev); 5225 gaudi2_init_kdma(hdev); 5226 5227 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5228 if (rc) { 5229 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5230 return rc; 5231 } 5232 5233 rc = gaudi2->cpucp_info_get(hdev); 5234 if (rc) { 5235 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5236 return rc; 5237 } 5238 5239 rc = gaudi2_mmu_init(hdev); 5240 if (rc) 5241 return rc; 5242 5243 gaudi2_init_pdma(hdev); 5244 gaudi2_init_edma(hdev); 5245 gaudi2_init_sm(hdev); 5246 gaudi2_init_tpc(hdev); 5247 gaudi2_init_mme(hdev); 5248 gaudi2_init_rotator(hdev); 5249 gaudi2_init_dec(hdev); 5250 gaudi2_enable_timestamp(hdev); 5251 5252 rc = gaudi2_coresight_init(hdev); 5253 if (rc) 5254 goto disable_queues; 5255 5256 rc = gaudi2_enable_msix(hdev); 5257 if (rc) 5258 goto disable_queues; 5259 5260 /* Perform read from the device to flush all configuration */ 5261 RREG32(mmHW_STATE); 5262 5263 return 0; 5264 5265 disable_queues: 5266 gaudi2_disable_dma_qmans(hdev); 5267 gaudi2_disable_mme_qmans(hdev); 5268 gaudi2_disable_tpc_qmans(hdev); 5269 gaudi2_disable_rot_qmans(hdev); 5270 gaudi2_disable_nic_qmans(hdev); 5271 5272 gaudi2_disable_timestamp(hdev); 5273 5274 return rc; 5275 } 5276 5277 /** 5278 * gaudi2_send_hard_reset_cmd - common function to handle reset 5279 * 5280 * @hdev: pointer to the habanalabs device structure 5281 * 5282 * This function handles the various possible scenarios for reset. 5283 * It considers if reset is handled by driver\FW and what FW components are loaded 5284 */ 5285 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 5286 { 5287 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5288 bool heartbeat_reset, preboot_only, cpu_initialized = false; 5289 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5290 u32 cpu_boot_status; 5291 5292 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 5293 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 5294 5295 /* 5296 * Handle corner case where failure was at cpu management app load, 5297 * and driver didn't detect any failure while loading the FW, 5298 * then at such scenario driver will send only HALT_MACHINE 5299 * and no one will respond to this request since FW already back to preboot 5300 * and it cannot handle such cmd. 5301 * In this case next time the management app loads it'll check on events register 5302 * which will still have the halt indication, and will reboot the device. 5303 * The solution is to let preboot clear all relevant registers before next boot 5304 * once driver send COMMS_RST_DEV. 5305 */ 5306 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 5307 5308 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 5309 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 5310 cpu_initialized = true; 5311 5312 /* 5313 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 5314 * 1. FW reset: FW initiate the reset sequence 5315 * 2. driver reset: FW will start HALT sequence (the preparations for the 5316 * reset but not the reset itself as it is not implemented 5317 * on their part) and LKD will wait to let FW complete the 5318 * sequence before issuing the reset 5319 */ 5320 if (!preboot_only && cpu_initialized) { 5321 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 5322 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 5323 5324 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 5325 } 5326 5327 /* 5328 * When working with preboot (without Linux/Boot fit) we can 5329 * communicate only using the COMMS commands to issue halt/reset. 5330 * 5331 * For the case in which we are working with Linux/Bootfit this is a hail-mary 5332 * attempt to revive the card in the small chance that the f/w has 5333 * experienced a watchdog event, which caused it to return back to preboot. 5334 * In that case, triggering reset through GIC won't help. We need to 5335 * trigger the reset as if Linux wasn't loaded. 5336 * 5337 * We do it only if the reset cause was HB, because that would be the 5338 * indication of such an event. 5339 * 5340 * In case watchdog hasn't expired but we still got HB, then this won't 5341 * do any damage. 5342 */ 5343 5344 if (heartbeat_reset || preboot_only || !cpu_initialized) { 5345 if (hdev->asic_prop.hard_reset_done_by_fw) 5346 hl_fw_ask_hard_reset_without_linux(hdev); 5347 else 5348 hl_fw_ask_halt_machine_without_linux(hdev); 5349 } 5350 } 5351 5352 /** 5353 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 5354 * 5355 * @hdev: pointer to the habanalabs device structure 5356 * @reset_sleep_ms: sleep time in msec after reset 5357 * 5358 * This function executes hard reset based on if driver/FW should do the reset 5359 */ 5360 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms) 5361 { 5362 if (hdev->asic_prop.hard_reset_done_by_fw) { 5363 gaudi2_send_hard_reset_cmd(hdev); 5364 return; 5365 } 5366 5367 /* Set device to handle FLR by H/W as we will put the device 5368 * CPU to halt mode 5369 */ 5370 WREG32(mmPCIE_AUX_FLR_CTRL, 5371 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 5372 5373 gaudi2_send_hard_reset_cmd(hdev); 5374 5375 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 5376 } 5377 5378 /** 5379 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 5380 * 5381 * @hdev: pointer to the habanalabs device structure 5382 * @reset_sleep_ms: sleep time in msec after reset 5383 * @driver_performs_reset: true if driver should perform reset instead of f/w. 5384 * 5385 * This function executes soft reset based on if driver/FW should do the reset 5386 */ 5387 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms, 5388 bool driver_performs_reset) 5389 { 5390 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5391 5392 if (!driver_performs_reset) { 5393 /* set SP to indicate reset request sent to FW */ 5394 if (dyn_regs->cpu_rst_status) 5395 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 5396 else 5397 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5398 5399 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 5400 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 5401 return; 5402 } 5403 5404 /* Block access to engines, QMANs and SM during reset, these 5405 * RRs will be reconfigured after soft reset. 5406 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 5407 */ 5408 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 5409 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 5410 5411 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 5412 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 5413 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 5414 5415 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 5416 } 5417 5418 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms, 5419 u32 poll_timeout_us) 5420 { 5421 int i, rc = 0; 5422 u32 reg_val; 5423 5424 /* without this sleep reset will not work */ 5425 msleep(reset_sleep_ms); 5426 5427 /* We poll the BTM done indication multiple times after reset due to 5428 * a HW errata 'GAUDI2_0300' 5429 */ 5430 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5431 rc = hl_poll_timeout( 5432 hdev, 5433 mmPSOC_GLOBAL_CONF_BTM_FSM, 5434 reg_val, 5435 reg_val == 0, 5436 1000, 5437 poll_timeout_us); 5438 5439 if (rc) 5440 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 5441 } 5442 5443 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 5444 { 5445 int i, rc = 0; 5446 u32 reg_val; 5447 5448 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 5449 rc = hl_poll_timeout( 5450 hdev, 5451 mmCPU_RST_STATUS_TO_HOST, 5452 reg_val, 5453 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 5454 1000, 5455 poll_timeout_us); 5456 5457 if (rc) 5458 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 5459 reg_val); 5460 } 5461 5462 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 5463 { 5464 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5465 u32 poll_timeout_us, reset_sleep_ms; 5466 bool driver_performs_reset = false; 5467 5468 if (hdev->pldm) { 5469 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 5470 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 5471 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 5472 } else { 5473 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 5474 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 5475 } 5476 5477 if (fw_reset) 5478 goto skip_reset; 5479 5480 gaudi2_reset_arcs(hdev); 5481 5482 if (hard_reset) { 5483 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 5484 gaudi2_execute_hard_reset(hdev, reset_sleep_ms); 5485 } else { 5486 /* 5487 * As we have to support also work with preboot only (which does not supports 5488 * soft reset) we have to make sure that security is disabled before letting driver 5489 * do the reset. user shall control the BFE flags to avoid asking soft reset in 5490 * secured device with preboot only. 5491 */ 5492 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 5493 !hdev->asic_prop.fw_security_enabled); 5494 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset); 5495 } 5496 5497 skip_reset: 5498 if (driver_performs_reset || hard_reset) 5499 /* 5500 * Instead of waiting for BTM indication we should wait for preboot ready: 5501 * Consider the below scenario: 5502 * 1. FW update is being triggered 5503 * - setting the dirty bit 5504 * 2. hard reset will be triggered due to the dirty bit 5505 * 3. FW initiates the reset: 5506 * - dirty bit cleared 5507 * - BTM indication cleared 5508 * - preboot ready indication cleared 5509 * 4. during hard reset: 5510 * - BTM indication will be set 5511 * - BIST test performed and another reset triggered 5512 * 5. only after this reset the preboot will set the preboot ready 5513 * 5514 * when polling on BTM indication alone we can lose sync with FW while trying to 5515 * communicate with FW that is during reset. 5516 * to overcome this we will always wait to preboot ready indication 5517 */ 5518 if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { 5519 msleep(reset_sleep_ms); 5520 hl_fw_wait_preboot_ready(hdev); 5521 } else { 5522 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); 5523 } 5524 else 5525 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 5526 5527 if (!gaudi2) 5528 return; 5529 5530 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 5531 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 5532 5533 /* 5534 * Clear NIC capability mask in order for driver to re-configure 5535 * NIC QMANs. NIC ports will not be re-configured during soft 5536 * reset as we call gaudi2_nic_init only during hard reset 5537 */ 5538 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 5539 5540 if (hard_reset) { 5541 gaudi2->hw_cap_initialized &= 5542 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 5543 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 5544 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 5545 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 5546 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 5547 5548 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 5549 } else { 5550 gaudi2->hw_cap_initialized &= 5551 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 5552 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 5553 HW_CAP_ROT_MASK); 5554 } 5555 } 5556 5557 static int gaudi2_suspend(struct hl_device *hdev) 5558 { 5559 int rc; 5560 5561 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 5562 if (rc) 5563 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 5564 5565 return rc; 5566 } 5567 5568 static int gaudi2_resume(struct hl_device *hdev) 5569 { 5570 return gaudi2_init_iatu(hdev); 5571 } 5572 5573 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 5574 void *cpu_addr, dma_addr_t dma_addr, size_t size) 5575 { 5576 int rc; 5577 5578 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 5579 VM_DONTCOPY | VM_NORESERVE; 5580 5581 #ifdef _HAS_DMA_MMAP_COHERENT 5582 5583 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 5584 if (rc) 5585 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 5586 5587 #else 5588 5589 rc = remap_pfn_range(vma, vma->vm_start, 5590 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 5591 size, vma->vm_page_prot); 5592 if (rc) 5593 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 5594 5595 #endif 5596 5597 return rc; 5598 } 5599 5600 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 5601 { 5602 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5603 u64 hw_cap_mask = 0; 5604 u64 hw_tpc_cap_bit = 0; 5605 u64 hw_nic_cap_bit = 0; 5606 u64 hw_test_cap_bit = 0; 5607 5608 switch (hw_queue_id) { 5609 case GAUDI2_QUEUE_ID_PDMA_0_0: 5610 case GAUDI2_QUEUE_ID_PDMA_0_1: 5611 case GAUDI2_QUEUE_ID_PDMA_1_0: 5612 hw_cap_mask = HW_CAP_PDMA_MASK; 5613 break; 5614 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 5615 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 5616 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 5617 break; 5618 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 5619 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 5620 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 5621 break; 5622 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 5623 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 5624 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 5625 break; 5626 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 5627 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 5628 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 5629 break; 5630 5631 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 5632 hw_test_cap_bit = HW_CAP_MME_SHIFT; 5633 break; 5634 5635 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 5636 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 5637 break; 5638 5639 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 5640 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 5641 break; 5642 5643 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 5644 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 5645 break; 5646 5647 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 5648 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 5649 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 5650 5651 /* special case where cap bit refers to the first queue id */ 5652 if (!hw_tpc_cap_bit) 5653 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 5654 break; 5655 5656 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 5657 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 5658 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 5659 break; 5660 5661 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 5662 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 5663 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 5664 break; 5665 5666 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 5667 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 5668 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 5669 break; 5670 5671 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 5672 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 5673 break; 5674 5675 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 5676 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 5677 break; 5678 5679 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 5680 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 5681 5682 /* special case where cap bit refers to the first queue id */ 5683 if (!hw_nic_cap_bit) 5684 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 5685 break; 5686 5687 case GAUDI2_QUEUE_ID_CPU_PQ: 5688 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 5689 5690 default: 5691 return false; 5692 } 5693 5694 if (hw_tpc_cap_bit) 5695 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 5696 5697 if (hw_nic_cap_bit) 5698 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 5699 5700 if (hw_test_cap_bit) 5701 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 5702 5703 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 5704 } 5705 5706 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 5707 { 5708 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5709 5710 switch (arc_id) { 5711 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5712 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5713 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 5714 5715 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5716 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5717 5718 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5719 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5720 5721 default: 5722 return false; 5723 } 5724 } 5725 5726 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5727 { 5728 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5729 5730 switch (arc_id) { 5731 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5732 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5733 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 5734 break; 5735 5736 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5737 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 5738 break; 5739 5740 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5741 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 5742 break; 5743 5744 default: 5745 return; 5746 } 5747 } 5748 5749 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 5750 { 5751 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5752 5753 switch (arc_id) { 5754 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 5755 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 5756 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 5757 break; 5758 5759 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 5760 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 5761 break; 5762 5763 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 5764 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 5765 break; 5766 5767 default: 5768 return; 5769 } 5770 } 5771 5772 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 5773 { 5774 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5775 u32 pq_offset, reg_base, db_reg_offset, db_value; 5776 5777 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 5778 /* 5779 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 5780 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 5781 * number. 5782 */ 5783 pq_offset = (hw_queue_id & 0x3) * 4; 5784 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 5785 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 5786 } else { 5787 db_reg_offset = mmCPU_IF_PF_PQ_PI; 5788 } 5789 5790 db_value = pi; 5791 5792 /* ring the doorbell */ 5793 WREG32(db_reg_offset, db_value); 5794 5795 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 5796 /* make sure device CPU will read latest data from host */ 5797 mb(); 5798 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 5799 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 5800 } 5801 } 5802 5803 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 5804 { 5805 __le64 *pbd = (__le64 *) bd; 5806 5807 /* The QMANs are on the host memory so a simple copy suffice */ 5808 pqe[0] = pbd[0]; 5809 pqe[1] = pbd[1]; 5810 } 5811 5812 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 5813 dma_addr_t *dma_handle, gfp_t flags) 5814 { 5815 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 5816 } 5817 5818 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 5819 void *cpu_addr, dma_addr_t dma_handle) 5820 { 5821 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 5822 } 5823 5824 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 5825 u32 timeout, u64 *result) 5826 { 5827 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5828 5829 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 5830 if (result) 5831 *result = 0; 5832 return 0; 5833 } 5834 5835 if (!timeout) 5836 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 5837 5838 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 5839 } 5840 5841 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 5842 gfp_t mem_flags, dma_addr_t *dma_handle) 5843 { 5844 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 5845 return NULL; 5846 5847 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 5848 } 5849 5850 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 5851 { 5852 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 5853 } 5854 5855 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 5856 dma_addr_t *dma_handle) 5857 { 5858 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 5859 } 5860 5861 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 5862 { 5863 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 5864 } 5865 5866 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 5867 enum dma_data_direction dir) 5868 { 5869 dma_addr_t dma_addr; 5870 5871 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 5872 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 5873 return 0; 5874 5875 return dma_addr; 5876 } 5877 5878 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 5879 enum dma_data_direction dir) 5880 { 5881 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 5882 } 5883 5884 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 5885 { 5886 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5887 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5888 5889 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 5890 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5891 return -EINVAL; 5892 } 5893 5894 /* Just check if CB address is valid */ 5895 5896 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5897 parser->user_cb_size, 5898 asic_prop->sram_user_base_address, 5899 asic_prop->sram_end_address)) 5900 return 0; 5901 5902 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5903 parser->user_cb_size, 5904 asic_prop->dram_user_base_address, 5905 asic_prop->dram_end_address)) 5906 return 0; 5907 5908 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 5909 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5910 parser->user_cb_size, 5911 asic_prop->dmmu.start_addr, 5912 asic_prop->dmmu.end_addr)) 5913 return 0; 5914 5915 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 5916 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5917 parser->user_cb_size, 5918 asic_prop->pmmu.start_addr, 5919 asic_prop->pmmu.end_addr) || 5920 hl_mem_area_inside_range( 5921 (u64) (uintptr_t) parser->user_cb, 5922 parser->user_cb_size, 5923 asic_prop->pmmu_huge.start_addr, 5924 asic_prop->pmmu_huge.end_addr)) 5925 return 0; 5926 5927 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 5928 if (!hdev->pdev) 5929 return 0; 5930 5931 if (!device_iommu_mapped(&hdev->pdev->dev)) 5932 return 0; 5933 } 5934 5935 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 5936 parser->user_cb, parser->user_cb_size); 5937 5938 return -EFAULT; 5939 } 5940 5941 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5942 { 5943 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5944 5945 if (!parser->is_kernel_allocated_cb) 5946 return gaudi2_validate_cb_address(hdev, parser); 5947 5948 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5949 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 5950 return -EINVAL; 5951 } 5952 5953 return 0; 5954 } 5955 5956 static int gaudi2_send_heartbeat(struct hl_device *hdev) 5957 { 5958 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5959 5960 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 5961 return 0; 5962 5963 return hl_fw_send_heartbeat(hdev); 5964 } 5965 5966 /* This is an internal helper function, used to update the KDMA mmu props. 5967 * Should be called with a proper kdma lock. 5968 */ 5969 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 5970 bool mmu_bypass, u32 asid) 5971 { 5972 u32 rw_asid, rw_mmu_bp; 5973 5974 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 5975 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 5976 5977 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 5978 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 5979 5980 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 5981 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 5982 } 5983 5984 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 5985 u32 mon_payload, u32 sync_value) 5986 { 5987 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 5988 u8 mask; 5989 5990 sob_offset = sob_id * 4; 5991 mon_offset = mon_id * 4; 5992 5993 /* Reset the SOB value */ 5994 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 5995 5996 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 5997 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 5998 5999 /* Configure this address with CS index because CQ_EN is set */ 6000 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 6001 6002 sync_group_id = sob_id / 8; 6003 mask = ~(1 << (sob_id & 0x7)); 6004 mode = 1; /* comparison mode is "equal to" */ 6005 6006 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 6007 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 6008 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 6009 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 6010 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 6011 } 6012 6013 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6014 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6015 u64 src_addr, u64 dst_addr, 6016 u32 size, bool is_memset) 6017 { 6018 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6019 struct hl_cq_entry *cq_base; 6020 struct hl_cq *cq; 6021 u64 comp_addr; 6022 int rc; 6023 6024 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6025 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6026 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6027 6028 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6029 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6030 6031 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6032 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6033 6034 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6035 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6036 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6037 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6038 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6039 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6040 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6041 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6042 6043 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6044 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6045 6046 if (is_memset) 6047 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6048 6049 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6050 6051 /* Wait for completion */ 6052 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6053 cq_base = cq->kernel_address; 6054 polling_addr = (u32 *)&cq_base[cq->ci]; 6055 6056 if (hdev->pldm) 6057 /* for each 1MB 20 second of timeout */ 6058 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6059 else 6060 timeout = KDMA_TIMEOUT_USEC; 6061 6062 /* Polling */ 6063 rc = hl_poll_timeout_memory( 6064 hdev, 6065 polling_addr, 6066 status, 6067 (status == 1), 6068 1000, 6069 timeout, 6070 true); 6071 6072 *polling_addr = 0; 6073 6074 if (rc) { 6075 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6076 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6077 return rc; 6078 } 6079 6080 cq->ci = hl_cq_inc_ptr(cq->ci); 6081 6082 return 0; 6083 } 6084 6085 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6086 { 6087 u32 i; 6088 6089 for (i = 0 ; i < size ; i += sizeof(u32)) 6090 WREG32(addr + i, val); 6091 } 6092 6093 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6094 { 6095 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6096 6097 if (enable) { 6098 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6099 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6100 } else { 6101 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6102 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6103 } 6104 } 6105 6106 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) 6107 { 6108 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 6109 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6110 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; 6111 struct packet_msg_short *msg_short_pkt; 6112 dma_addr_t pkt_dma_addr; 6113 size_t pkt_size; 6114 int rc; 6115 6116 if (hdev->pldm) 6117 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6118 else 6119 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6120 6121 pkt_size = sizeof(*msg_short_pkt); 6122 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); 6123 if (!msg_short_pkt) { 6124 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", 6125 hw_queue_id); 6126 return -ENOMEM; 6127 } 6128 6129 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6130 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6131 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6132 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6133 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6134 6135 msg_short_pkt->value = cpu_to_le32(sob_val); 6136 msg_short_pkt->ctl = cpu_to_le32(tmp); 6137 6138 /* Reset the SOB value */ 6139 WREG32(sob_addr, 0); 6140 6141 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 6142 if (rc) { 6143 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", 6144 hw_queue_id); 6145 goto free_pkt; 6146 } 6147 6148 rc = hl_poll_timeout( 6149 hdev, 6150 sob_addr, 6151 tmp, 6152 (tmp == sob_val), 6153 1000, 6154 timeout_usec); 6155 6156 if (rc == -ETIMEDOUT) { 6157 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6158 hw_queue_id, tmp); 6159 rc = -EIO; 6160 } 6161 6162 /* Reset the SOB value */ 6163 WREG32(sob_addr, 0); 6164 6165 free_pkt: 6166 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); 6167 return rc; 6168 } 6169 6170 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6171 { 6172 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6173 6174 /* 6175 * check capability here as send_cpu_message() won't update the result 6176 * value if no capability 6177 */ 6178 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6179 return 0; 6180 6181 return hl_fw_test_cpu_queue(hdev); 6182 } 6183 6184 static int gaudi2_test_queues(struct hl_device *hdev) 6185 { 6186 int i, rc, ret_val = 0; 6187 6188 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6189 if (!gaudi2_is_queue_enabled(hdev, i)) 6190 continue; 6191 6192 gaudi2_qman_set_test_mode(hdev, i, true); 6193 rc = gaudi2_test_queue(hdev, i); 6194 gaudi2_qman_set_test_mode(hdev, i, false); 6195 6196 if (rc) { 6197 ret_val = -EINVAL; 6198 goto done; 6199 } 6200 } 6201 6202 rc = gaudi2_test_cpu_queue(hdev); 6203 if (rc) { 6204 ret_val = -EINVAL; 6205 goto done; 6206 } 6207 6208 done: 6209 return ret_val; 6210 } 6211 6212 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6213 { 6214 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6215 size_t irq_arr_size; 6216 6217 /* TODO: missing gaudi2_nic_resume. 6218 * Until implemented nic_hw_cap_initialized will remain zeroed 6219 */ 6220 gaudi2_init_arcs(hdev); 6221 gaudi2_scrub_arcs_dccm(hdev); 6222 gaudi2_init_security(hdev); 6223 6224 /* Unmask all IRQs since some could have been received during the soft reset */ 6225 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 6226 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 6227 } 6228 6229 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 6230 struct iterate_module_ctx *ctx) 6231 { 6232 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 6233 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 6234 bool is_eng_idle; 6235 int engine_idx; 6236 6237 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 6238 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 6239 else 6240 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 6241 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 6242 6243 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 6244 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 6245 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 6246 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 6247 6248 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6249 IS_TPC_IDLE(tpc_cfg_sts); 6250 *(idle_data->is_idle) &= is_eng_idle; 6251 6252 if (idle_data->mask && !is_eng_idle) 6253 set_bit(engine_idx, idle_data->mask); 6254 6255 if (idle_data->e) 6256 hl_engine_data_sprintf(idle_data->e, 6257 idle_data->tpc_fmt, dcore, inst, 6258 is_eng_idle ? "Y" : "N", 6259 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6260 } 6261 6262 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6263 struct engines_data *e) 6264 { 6265 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, 6266 mme_arch_sts, dec_swreg15, dec_enabled_bit; 6267 struct asic_fixed_properties *prop = &hdev->asic_prop; 6268 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n"; 6269 unsigned long *mask = (unsigned long *) mask_arr; 6270 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n"; 6271 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 6272 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 6273 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n"; 6274 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 6275 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 6276 bool is_idle = true, is_eng_idle; 6277 u64 offset; 6278 6279 struct gaudi2_tpc_idle_data tpc_idle_data = { 6280 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 6281 .e = e, 6282 .mask = mask, 6283 .is_idle = &is_idle, 6284 }; 6285 struct iterate_module_ctx tpc_iter = { 6286 .fn = &gaudi2_is_tpc_engine_idle, 6287 .data = &tpc_idle_data, 6288 }; 6289 6290 int engine_idx, i, j; 6291 6292 /* EDMA, Two engines per Dcore */ 6293 if (e) 6294 hl_engine_data_sprintf(e, 6295 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6296 "---- ---- ------- ------------ ----------------------\n"); 6297 6298 for (i = 0; i < NUM_OF_DCORES; i++) { 6299 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 6300 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 6301 6302 if (!(prop->edma_enabled_mask & BIT(seq))) 6303 continue; 6304 6305 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 6306 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6307 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 6308 6309 dma_core_idle_ind_mask = 6310 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset); 6311 6312 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 6313 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 6314 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 6315 6316 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6317 IS_DMA_IDLE(dma_core_idle_ind_mask); 6318 is_idle &= is_eng_idle; 6319 6320 if (mask && !is_eng_idle) 6321 set_bit(engine_idx, mask); 6322 6323 if (e) 6324 hl_engine_data_sprintf(e, edma_fmt, i, j, 6325 is_eng_idle ? "Y" : "N", 6326 qm_glbl_sts0, 6327 dma_core_idle_ind_mask); 6328 } 6329 } 6330 6331 /* PDMA, Two engines in Full chip */ 6332 if (e) 6333 hl_engine_data_sprintf(e, 6334 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6335 "---- ------- ------------ ----------------------\n"); 6336 6337 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6338 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 6339 offset = i * PDMA_OFFSET; 6340 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset); 6341 6342 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 6343 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 6344 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 6345 6346 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6347 IS_DMA_IDLE(dma_core_idle_ind_mask); 6348 is_idle &= is_eng_idle; 6349 6350 if (mask && !is_eng_idle) 6351 set_bit(engine_idx, mask); 6352 6353 if (e) 6354 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 6355 qm_glbl_sts0, dma_core_idle_ind_mask); 6356 } 6357 6358 /* NIC, twelve macros in Full chip */ 6359 if (e && hdev->nic_ports_mask) 6360 hl_engine_data_sprintf(e, 6361 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6362 "--- ------- ------------ ----------\n"); 6363 6364 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6365 if (!(i & 1)) 6366 offset = i / 2 * NIC_OFFSET; 6367 else 6368 offset += NIC_QM_OFFSET; 6369 6370 if (!(hdev->nic_ports_mask & BIT(i))) 6371 continue; 6372 6373 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 6374 6375 6376 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 6377 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 6378 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 6379 6380 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6381 is_idle &= is_eng_idle; 6382 6383 if (mask && !is_eng_idle) 6384 set_bit(engine_idx, mask); 6385 6386 if (e) 6387 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 6388 qm_glbl_sts0, qm_cgm_sts); 6389 } 6390 6391 if (e) 6392 hl_engine_data_sprintf(e, 6393 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6394 "--- ---- ------- ------------ ---------------\n"); 6395 /* MME, one per Dcore */ 6396 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6397 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 6398 offset = i * DCORE_OFFSET; 6399 6400 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 6401 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 6402 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 6403 6404 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6405 is_idle &= is_eng_idle; 6406 6407 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 6408 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 6409 is_idle &= is_eng_idle; 6410 6411 if (e) 6412 hl_engine_data_sprintf(e, mme_fmt, i, "N", 6413 is_eng_idle ? "Y" : "N", 6414 qm_glbl_sts0, 6415 mme_arch_sts); 6416 6417 if (mask && !is_eng_idle) 6418 set_bit(engine_idx, mask); 6419 } 6420 6421 /* 6422 * TPC 6423 */ 6424 if (e && prop->tpc_enabled_mask) 6425 hl_engine_data_sprintf(e, 6426 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" 6427 "---- --- -------- ------------ ---------- ----------------------\n"); 6428 6429 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6430 6431 /* Decoders, two each Dcore and two shared PCIe decoders */ 6432 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6433 hl_engine_data_sprintf(e, 6434 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 6435 "---- --- ------- ---------------\n"); 6436 6437 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6438 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 6439 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 6440 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 6441 continue; 6442 6443 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 6444 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6445 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 6446 6447 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 6448 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6449 is_idle &= is_eng_idle; 6450 6451 if (mask && !is_eng_idle) 6452 set_bit(engine_idx, mask); 6453 6454 if (e) 6455 hl_engine_data_sprintf(e, dec_fmt, i, j, 6456 is_eng_idle ? "Y" : "N", dec_swreg15); 6457 } 6458 } 6459 6460 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6461 hl_engine_data_sprintf(e, 6462 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 6463 "-------- ------- ---------------\n"); 6464 6465 /* Check shared(PCIe) decoders */ 6466 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 6467 dec_enabled_bit = PCIE_DEC_SHIFT + i; 6468 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 6469 continue; 6470 6471 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 6472 offset = i * DCORE_DEC_OFFSET; 6473 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 6474 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 6475 is_idle &= is_eng_idle; 6476 6477 if (mask && !is_eng_idle) 6478 set_bit(engine_idx, mask); 6479 6480 if (e) 6481 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 6482 is_eng_idle ? "Y" : "N", dec_swreg15); 6483 } 6484 6485 if (e) 6486 hl_engine_data_sprintf(e, 6487 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6488 "---- ---- ------- ------------ ---------- -------------\n"); 6489 6490 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6491 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 6492 6493 offset = i * ROT_OFFSET; 6494 6495 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 6496 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 6497 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 6498 6499 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 6500 is_idle &= is_eng_idle; 6501 6502 if (mask && !is_eng_idle) 6503 set_bit(engine_idx, mask); 6504 6505 if (e) 6506 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6507 qm_glbl_sts0, qm_cgm_sts, "-"); 6508 } 6509 6510 return is_idle; 6511 } 6512 6513 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 6514 __acquires(&gaudi2->hw_queues_lock) 6515 { 6516 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6517 6518 spin_lock(&gaudi2->hw_queues_lock); 6519 } 6520 6521 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 6522 __releases(&gaudi2->hw_queues_lock) 6523 { 6524 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6525 6526 spin_unlock(&gaudi2->hw_queues_lock); 6527 } 6528 6529 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 6530 { 6531 return hdev->pdev->device; 6532 } 6533 6534 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 6535 { 6536 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6537 6538 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6539 return 0; 6540 6541 return hl_fw_get_eeprom_data(hdev, data, max_size); 6542 } 6543 6544 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 6545 { 6546 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 6547 } 6548 6549 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 6550 { 6551 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6552 6553 if (aggregate) { 6554 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 6555 return gaudi2->events_stat_aggregate; 6556 } 6557 6558 *size = (u32) sizeof(gaudi2->events_stat); 6559 return gaudi2->events_stat; 6560 } 6561 6562 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 6563 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6564 { 6565 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 6566 dcore_vdec_id + DCORE_OFFSET * dcore_id; 6567 6568 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6569 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6570 6571 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6572 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6573 6574 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6575 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6576 6577 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6578 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6579 6580 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6581 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6582 } 6583 6584 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 6585 { 6586 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6587 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6588 struct asic_fixed_properties *prop = &hdev->asic_prop; 6589 u32 dcore_offset = dcore_id * DCORE_OFFSET; 6590 u32 vdec_id, i, ports_offset, reg_val; 6591 u8 edma_seq_base; 6592 6593 /* EDMA */ 6594 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 6595 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 6596 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6597 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6598 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6599 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6600 } 6601 6602 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 6603 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6604 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6605 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 6606 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 6607 } 6608 6609 /* Sync Mngr */ 6610 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 6611 /* 6612 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 6613 * for any access type 6614 */ 6615 if (dcore_id > 0) { 6616 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 6617 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 6618 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 6619 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 6620 } 6621 6622 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 6623 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 6624 6625 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 6626 ports_offset = i * DCORE_MME_SBTE_OFFSET; 6627 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 6628 dcore_offset + ports_offset, 0); 6629 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 6630 dcore_offset + ports_offset, rw_asid); 6631 } 6632 6633 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 6634 ports_offset = i * DCORE_MME_WB_OFFSET; 6635 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 6636 dcore_offset + ports_offset, 0); 6637 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 6638 dcore_offset + ports_offset, rw_asid); 6639 } 6640 6641 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 6642 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 6643 6644 /* 6645 * Decoders 6646 */ 6647 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 6648 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 6649 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 6650 } 6651 } 6652 6653 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 6654 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 6655 { 6656 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 6657 6658 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 6659 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 6660 6661 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 6662 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 6663 6664 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 6665 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 6666 6667 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 6668 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 6669 6670 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 6671 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 6672 } 6673 6674 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 6675 u32 rw_asid, u32 rw_mmu_bp) 6676 { 6677 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 6678 6679 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 6680 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 6681 } 6682 6683 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 6684 { 6685 u32 reg_base, reg_offset, reg_val = 0; 6686 6687 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 6688 6689 /* Enable MMU and configure asid for all relevant ARC regions */ 6690 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 6691 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 6692 6693 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 6694 WREG32(reg_base + reg_offset, reg_val); 6695 6696 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 6697 WREG32(reg_base + reg_offset, reg_val); 6698 6699 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 6700 WREG32(reg_base + reg_offset, reg_val); 6701 6702 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 6703 WREG32(reg_base + reg_offset, reg_val); 6704 6705 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 6706 WREG32(reg_base + reg_offset, reg_val); 6707 6708 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 6709 WREG32(reg_base + reg_offset, reg_val); 6710 6711 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 6712 WREG32(reg_base + reg_offset, reg_val); 6713 6714 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 6715 WREG32(reg_base + reg_offset, reg_val); 6716 6717 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 6718 WREG32(reg_base + reg_offset, reg_val); 6719 6720 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 6721 WREG32(reg_base + reg_offset, reg_val); 6722 6723 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 6724 WREG32(reg_base + reg_offset, reg_val); 6725 } 6726 6727 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 6728 { 6729 int i; 6730 6731 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 6732 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 6733 6734 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6735 gaudi2_arc_mmu_prepare(hdev, i, asid); 6736 6737 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 6738 if (!gaudi2_is_queue_enabled(hdev, i)) 6739 continue; 6740 6741 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 6742 } 6743 6744 return 0; 6745 } 6746 6747 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 6748 { 6749 struct asic_fixed_properties *prop = &hdev->asic_prop; 6750 u32 rw_asid, offset; 6751 int rc, i; 6752 6753 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 6754 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 6755 6756 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6757 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6758 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6759 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6760 6761 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 6762 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 6763 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 6764 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 6765 6766 /* ROT */ 6767 for (i = 0 ; i < NUM_OF_ROT ; i++) { 6768 offset = i * ROT_OFFSET; 6769 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 6770 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6771 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 6772 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 6773 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 6774 } 6775 6776 /* Shared Decoders are the last bits in the decoders mask */ 6777 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 6778 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 6779 6780 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 6781 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 6782 6783 /* arc farm arc dup eng */ 6784 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 6785 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 6786 6787 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 6788 if (rc) 6789 return rc; 6790 6791 return 0; 6792 } 6793 6794 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 6795 struct iterate_module_ctx *ctx) 6796 { 6797 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 6798 6799 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 6800 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 6801 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 6802 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 6803 } 6804 6805 /* zero the MMUBP and set the ASID */ 6806 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 6807 { 6808 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6809 struct gaudi2_tpc_mmu_data tpc_mmu_data; 6810 struct iterate_module_ctx tpc_iter = { 6811 .fn = &gaudi2_tpc_mmu_prepare, 6812 .data = &tpc_mmu_data, 6813 }; 6814 int rc, i; 6815 6816 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 6817 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6818 return -EINVAL; 6819 } 6820 6821 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 6822 return 0; 6823 6824 rc = gaudi2_mmu_shared_prepare(hdev, asid); 6825 if (rc) 6826 return rc; 6827 6828 /* configure DCORE MMUs */ 6829 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6830 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6831 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6832 for (i = 0 ; i < NUM_OF_DCORES ; i++) 6833 gaudi2_mmu_dcore_prepare(hdev, i, asid); 6834 6835 return 0; 6836 } 6837 6838 static inline bool is_info_event(u32 event) 6839 { 6840 switch (event) { 6841 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 6842 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 6843 6844 /* return in case of NIC status event - these events are received periodically and not as 6845 * an indication to an error. 6846 */ 6847 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 6848 return true; 6849 default: 6850 return false; 6851 } 6852 } 6853 6854 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 6855 bool ratelimited, const char *fmt, ...) 6856 { 6857 struct va_format vaf; 6858 va_list args; 6859 6860 va_start(args, fmt); 6861 vaf.fmt = fmt; 6862 vaf.va = &args; 6863 6864 if (ratelimited) 6865 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 6866 gaudi2_irq_map_table[event_type].valid ? 6867 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6868 else 6869 dev_err(hdev->dev, "%s: %pV\n", 6870 gaudi2_irq_map_table[event_type].valid ? 6871 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 6872 6873 va_end(args); 6874 } 6875 6876 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 6877 struct hl_eq_ecc_data *ecc_data) 6878 { 6879 u64 ecc_address = 0, ecc_syndrom = 0; 6880 u8 memory_wrapper_idx = 0; 6881 6882 ecc_address = le64_to_cpu(ecc_data->ecc_address); 6883 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 6884 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 6885 6886 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 6887 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n", 6888 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 6889 6890 return !!ecc_data->is_critical; 6891 } 6892 6893 /* 6894 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6895 * 6896 * @idx: the current pi/ci value 6897 * @q_len: the queue length (power of 2) 6898 * 6899 * @return the cyclically decremented index 6900 */ 6901 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 6902 { 6903 u32 mask = q_len - 1; 6904 6905 /* 6906 * modular decrement is equivalent to adding (queue_size -1) 6907 * later we take LSBs to make sure the value is in the 6908 * range [0, queue_len - 1] 6909 */ 6910 return (idx + q_len - 1) & mask; 6911 } 6912 6913 /** 6914 * gaudi2_print_sw_config_stream_data - print SW config stream data 6915 * 6916 * @hdev: pointer to the habanalabs device structure 6917 * @stream: the QMAN's stream 6918 * @qman_base: base address of QMAN registers block 6919 */ 6920 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 6921 u32 stream, u64 qman_base) 6922 { 6923 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6924 u32 cq_ptr_lo_off, size; 6925 6926 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 6927 6928 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 6929 stream * cq_ptr_lo_off; 6930 6931 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6932 6933 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 6934 6935 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6936 size = RREG32(cq_tsize); 6937 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 6938 stream, cq_ptr, size); 6939 } 6940 6941 /** 6942 * gaudi2_print_last_pqes_on_err - print last PQEs on error 6943 * 6944 * @hdev: pointer to the habanalabs device structure 6945 * @qid_base: first QID of the QMAN (out of 4 streams) 6946 * @stream: the QMAN's stream 6947 * @qman_base: base address of QMAN registers block 6948 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6949 */ 6950 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 6951 u64 qman_base, bool pr_sw_conf) 6952 { 6953 u32 ci, qm_ci_stream_off; 6954 struct hl_hw_queue *q; 6955 u64 pq_ci; 6956 int i; 6957 6958 q = &hdev->kernel_queues[qid_base + stream]; 6959 6960 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 6961 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 6962 stream * qm_ci_stream_off; 6963 6964 hdev->asic_funcs->hw_queues_lock(hdev); 6965 6966 if (pr_sw_conf) 6967 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 6968 6969 ci = RREG32(pq_ci); 6970 6971 /* we should start printing form ci -1 */ 6972 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6973 6974 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6975 struct hl_bd *bd; 6976 u64 addr; 6977 u32 len; 6978 6979 bd = q->kernel_address; 6980 bd += ci; 6981 6982 len = le32_to_cpu(bd->len); 6983 /* len 0 means uninitialized entry- break */ 6984 if (!len) 6985 break; 6986 6987 addr = le64_to_cpu(bd->ptr); 6988 6989 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 6990 stream, ci, addr, len); 6991 6992 /* get previous ci, wrap if needed */ 6993 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 6994 } 6995 6996 hdev->asic_funcs->hw_queues_unlock(hdev); 6997 } 6998 6999 /** 7000 * print_qman_data_on_err - extract QMAN data on error 7001 * 7002 * @hdev: pointer to the habanalabs device structure 7003 * @qid_base: first QID of the QMAN (out of 4 streams) 7004 * @stream: the QMAN's stream 7005 * @qman_base: base address of QMAN registers block 7006 * 7007 * This function attempt to extract as much data as possible on QMAN error. 7008 * On upper CP print the SW config stream data and last 8 PQEs. 7009 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7010 */ 7011 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7012 { 7013 u32 i; 7014 7015 if (stream != QMAN_STREAMS) { 7016 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7017 return; 7018 } 7019 7020 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7021 7022 for (i = 0 ; i < QMAN_STREAMS ; i++) 7023 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7024 } 7025 7026 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7027 u64 qman_base, u32 qid_base) 7028 { 7029 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7030 u64 glbl_sts_addr, arb_err_addr; 7031 char reg_desc[32]; 7032 7033 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7034 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7035 7036 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7037 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7038 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7039 7040 if (!glbl_sts_val) 7041 continue; 7042 7043 if (i == QMAN_STREAMS) { 7044 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7045 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7046 } else { 7047 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7048 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7049 } 7050 7051 for (j = 0 ; j < num_error_causes ; j++) 7052 if (glbl_sts_val & BIT(j)) { 7053 gaudi2_print_event(hdev, event_type, true, 7054 "%s. err cause: %s", reg_desc, 7055 i == QMAN_STREAMS ? 7056 gaudi2_qman_lower_cp_error_cause[j] : 7057 gaudi2_qman_error_cause[j]); 7058 error_count++; 7059 } 7060 7061 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7062 } 7063 7064 arb_err_val = RREG32(arb_err_addr); 7065 7066 if (!arb_err_val) 7067 goto out; 7068 7069 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7070 if (arb_err_val & BIT(j)) { 7071 gaudi2_print_event(hdev, event_type, true, 7072 "ARB_ERR. err cause: %s", 7073 gaudi2_qman_arb_error_cause[j]); 7074 error_count++; 7075 } 7076 } 7077 7078 out: 7079 return error_count; 7080 } 7081 7082 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7083 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7084 enum gaudi2_engine_id id, u64 *event_mask) 7085 { 7086 u32 razwi_hi, razwi_lo, razwi_xy; 7087 u16 eng_id = id; 7088 u8 rd_wr_flag; 7089 7090 if (is_write) { 7091 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7092 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7093 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7094 rd_wr_flag = HL_RAZWI_WRITE; 7095 } else { 7096 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7097 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7098 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7099 rd_wr_flag = HL_RAZWI_READ; 7100 } 7101 7102 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7103 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7104 7105 dev_err_ratelimited(hdev->dev, 7106 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7107 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7108 } 7109 7110 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7111 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7112 enum gaudi2_engine_id id, u64 *event_mask) 7113 { 7114 u32 razwi_addr, razwi_xy; 7115 u16 eng_id = id; 7116 u8 rd_wr_flag; 7117 7118 if (is_write) { 7119 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7120 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7121 rd_wr_flag = HL_RAZWI_WRITE; 7122 } else { 7123 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7124 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7125 rd_wr_flag = HL_RAZWI_READ; 7126 } 7127 7128 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7129 dev_err_ratelimited(hdev->dev, 7130 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", 7131 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7132 razwi_xy); 7133 } 7134 7135 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7136 enum razwi_event_sources module, u8 module_idx) 7137 { 7138 switch (module) { 7139 case RAZWI_TPC: 7140 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7141 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7142 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7143 (module_idx % NUM_OF_TPC_PER_DCORE) + 7144 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7145 7146 case RAZWI_MME: 7147 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 7148 (module_idx * ENGINE_ID_DCORE_OFFSET)); 7149 7150 case RAZWI_EDMA: 7151 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7152 (module_idx % NUM_OF_EDMA_PER_DCORE)); 7153 7154 case RAZWI_PDMA: 7155 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 7156 7157 case RAZWI_NIC: 7158 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 7159 7160 case RAZWI_DEC: 7161 if (module_idx == 8) 7162 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 7163 7164 if (module_idx == 9) 7165 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 7166 ; 7167 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7168 (module_idx % NUM_OF_DEC_PER_DCORE) + 7169 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7170 7171 case RAZWI_ROT: 7172 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 7173 7174 default: 7175 return GAUDI2_ENGINE_ID_SIZE; 7176 } 7177 } 7178 7179 /* 7180 * This function handles RR(Range register) hit events. 7181 * raised be initiators not PSOC RAZWI. 7182 */ 7183 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 7184 enum razwi_event_sources module, u8 module_idx, 7185 u8 module_sub_idx, u64 *event_mask) 7186 { 7187 bool via_sft = false; 7188 u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id; 7189 u64 rtr_mstr_if_base_addr; 7190 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 7191 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 7192 char initiator_name[64]; 7193 7194 switch (module) { 7195 case RAZWI_TPC: 7196 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx]; 7197 sprintf(initiator_name, "TPC_%u", module_idx); 7198 break; 7199 case RAZWI_MME: 7200 sprintf(initiator_name, "MME_%u", module_idx); 7201 switch (module_sub_idx) { 7202 case MME_WAP0: 7203 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 7204 break; 7205 case MME_WAP1: 7206 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 7207 break; 7208 case MME_WRITE: 7209 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 7210 break; 7211 case MME_READ: 7212 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 7213 break; 7214 case MME_SBTE0: 7215 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 7216 break; 7217 case MME_SBTE1: 7218 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 7219 break; 7220 case MME_SBTE2: 7221 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 7222 break; 7223 case MME_SBTE3: 7224 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 7225 break; 7226 case MME_SBTE4: 7227 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 7228 break; 7229 default: 7230 return; 7231 } 7232 break; 7233 case RAZWI_EDMA: 7234 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id; 7235 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id; 7236 via_sft = true; 7237 sprintf(initiator_name, "EDMA_%u", module_idx); 7238 break; 7239 case RAZWI_PDMA: 7240 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx]; 7241 sprintf(initiator_name, "PDMA_%u", module_idx); 7242 break; 7243 case RAZWI_NIC: 7244 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx]; 7245 sprintf(initiator_name, "NIC_%u", module_idx); 7246 break; 7247 case RAZWI_DEC: 7248 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx]; 7249 sprintf(initiator_name, "DEC_%u", module_idx); 7250 break; 7251 case RAZWI_ROT: 7252 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx]; 7253 sprintf(initiator_name, "ROT_%u", module_idx); 7254 break; 7255 default: 7256 return; 7257 } 7258 7259 /* Find router mstr_if register base */ 7260 if (via_sft) { 7261 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + 7262 dcore_id * SFT_DCORE_OFFSET + 7263 sft_id * SFT_IF_OFFSET + 7264 RTR_MSTR_IF_OFFSET; 7265 } else { 7266 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7267 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7268 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 7269 dcore_id * DCORE_OFFSET + 7270 dcore_rtr_id * DCORE_RTR_OFFSET + 7271 RTR_MSTR_IF_OFFSET; 7272 } 7273 7274 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 7275 hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 7276 hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 7277 7278 if (via_sft) { 7279 /* SFT has separate MSTR_IF for LBW, only there we can 7280 * read the LBW razwi related registers 7281 */ 7282 u64 base; 7283 7284 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET + 7285 RTR_LBW_MSTR_IF_OFFSET; 7286 7287 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7288 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7289 } else { 7290 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 7291 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 7292 } 7293 7294 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 7295 if (hbw_shrd_aw) { 7296 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7297 initiator_name, eng_id, event_mask); 7298 7299 /* Clear event indication */ 7300 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 7301 } 7302 7303 if (hbw_shrd_ar) { 7304 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7305 initiator_name, eng_id, event_mask); 7306 7307 /* Clear event indication */ 7308 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 7309 } 7310 7311 if (lbw_shrd_aw) { 7312 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, 7313 initiator_name, eng_id, event_mask); 7314 7315 /* Clear event indication */ 7316 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 7317 } 7318 7319 if (lbw_shrd_ar) { 7320 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, 7321 initiator_name, eng_id, event_mask); 7322 7323 /* Clear event indication */ 7324 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 7325 } 7326 } 7327 7328 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 7329 { 7330 struct asic_fixed_properties *prop = &hdev->asic_prop; 7331 u8 mod_idx, sub_mod; 7332 7333 /* check all TPCs */ 7334 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 7335 if (prop->tpc_enabled_mask & BIT(mod_idx)) 7336 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL); 7337 } 7338 7339 /* check all MMEs */ 7340 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7341 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 7342 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 7343 sub_mod, NULL); 7344 7345 /* check all EDMAs */ 7346 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 7347 if (prop->edma_enabled_mask & BIT(mod_idx)) 7348 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL); 7349 7350 /* check all PDMAs */ 7351 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 7352 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL); 7353 7354 /* check all NICs */ 7355 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 7356 if (hdev->nic_ports_mask & BIT(mod_idx)) 7357 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 7358 NULL); 7359 7360 /* check all DECs */ 7361 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 7362 if (prop->decoder_enabled_mask & BIT(mod_idx)) 7363 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL); 7364 7365 /* check all ROTs */ 7366 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 7367 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); 7368 } 7369 7370 static const char *gaudi2_get_initiators_name(u32 rtr_id) 7371 { 7372 switch (rtr_id) { 7373 case DCORE0_RTR0: 7374 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; 7375 case DCORE0_RTR1: 7376 return "TPC0/1"; 7377 case DCORE0_RTR2: 7378 return "TPC2/3"; 7379 case DCORE0_RTR3: 7380 return "TPC4/5"; 7381 case DCORE0_RTR4: 7382 return "MME0_SBTE0/1"; 7383 case DCORE0_RTR5: 7384 return "MME0_WAP0/SBTE2"; 7385 case DCORE0_RTR6: 7386 return "MME0_CTRL_WR/SBTE3"; 7387 case DCORE0_RTR7: 7388 return "MME0_WAP1/CTRL_RD/SBTE4"; 7389 case DCORE1_RTR0: 7390 return "MME1_WAP1/CTRL_RD/SBTE4"; 7391 case DCORE1_RTR1: 7392 return "MME1_CTRL_WR/SBTE3"; 7393 case DCORE1_RTR2: 7394 return "MME1_WAP0/SBTE2"; 7395 case DCORE1_RTR3: 7396 return "MME1_SBTE0/1"; 7397 case DCORE1_RTR4: 7398 return "TPC10/11"; 7399 case DCORE1_RTR5: 7400 return "TPC8/9"; 7401 case DCORE1_RTR6: 7402 return "TPC6/7"; 7403 case DCORE1_RTR7: 7404 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; 7405 case DCORE2_RTR0: 7406 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; 7407 case DCORE2_RTR1: 7408 return "TPC16/17"; 7409 case DCORE2_RTR2: 7410 return "TPC14/15"; 7411 case DCORE2_RTR3: 7412 return "TPC12/13"; 7413 case DCORE2_RTR4: 7414 return "MME2_SBTE0/1"; 7415 case DCORE2_RTR5: 7416 return "MME2_WAP0/SBTE2"; 7417 case DCORE2_RTR6: 7418 return "MME2_CTRL_WR/SBTE3"; 7419 case DCORE2_RTR7: 7420 return "MME2_WAP1/CTRL_RD/SBTE4"; 7421 case DCORE3_RTR0: 7422 return "MME3_WAP1/CTRL_RD/SBTE4"; 7423 case DCORE3_RTR1: 7424 return "MME3_CTRL_WR/SBTE3"; 7425 case DCORE3_RTR2: 7426 return "MME3_WAP0/SBTE2"; 7427 case DCORE3_RTR3: 7428 return "MME3_SBTE0/1"; 7429 case DCORE3_RTR4: 7430 return "TPC18/19"; 7431 case DCORE3_RTR5: 7432 return "TPC20/21"; 7433 case DCORE3_RTR6: 7434 return "TPC22/23"; 7435 case DCORE3_RTR7: 7436 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; 7437 default: 7438 return "N/A"; 7439 } 7440 } 7441 7442 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) 7443 { 7444 switch (rtr_id) { 7445 case DCORE0_RTR0: 7446 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; 7447 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; 7448 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; 7449 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; 7450 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7451 engines[5] = GAUDI2_ENGINE_ID_PDMA_0; 7452 engines[6] = GAUDI2_ENGINE_ID_PDMA_1; 7453 engines[7] = GAUDI2_ENGINE_ID_PCIE; 7454 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; 7455 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; 7456 engines[10] = GAUDI2_ENGINE_ID_PSOC; 7457 return 11; 7458 7459 case DCORE0_RTR1: 7460 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; 7461 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; 7462 return 2; 7463 7464 case DCORE0_RTR2: 7465 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; 7466 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; 7467 return 2; 7468 7469 case DCORE0_RTR3: 7470 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; 7471 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; 7472 return 2; 7473 7474 case DCORE0_RTR4: 7475 case DCORE0_RTR5: 7476 case DCORE0_RTR6: 7477 case DCORE0_RTR7: 7478 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; 7479 return 1; 7480 7481 case DCORE1_RTR0: 7482 case DCORE1_RTR1: 7483 case DCORE1_RTR2: 7484 case DCORE1_RTR3: 7485 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; 7486 return 1; 7487 7488 case DCORE1_RTR4: 7489 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; 7490 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; 7491 return 2; 7492 7493 case DCORE1_RTR5: 7494 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; 7495 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; 7496 return 2; 7497 7498 case DCORE1_RTR6: 7499 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; 7500 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; 7501 return 2; 7502 7503 case DCORE1_RTR7: 7504 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; 7505 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; 7506 engines[2] = GAUDI2_ENGINE_ID_NIC0_0; 7507 engines[3] = GAUDI2_ENGINE_ID_NIC1_0; 7508 engines[4] = GAUDI2_ENGINE_ID_NIC2_0; 7509 engines[5] = GAUDI2_ENGINE_ID_NIC3_0; 7510 engines[6] = GAUDI2_ENGINE_ID_NIC4_0; 7511 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; 7512 engines[8] = GAUDI2_ENGINE_ID_KDMA; 7513 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; 7514 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; 7515 return 11; 7516 7517 case DCORE2_RTR0: 7518 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; 7519 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; 7520 engines[2] = GAUDI2_ENGINE_ID_NIC5_0; 7521 engines[3] = GAUDI2_ENGINE_ID_NIC6_0; 7522 engines[4] = GAUDI2_ENGINE_ID_NIC7_0; 7523 engines[5] = GAUDI2_ENGINE_ID_NIC8_0; 7524 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; 7525 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; 7526 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7527 return 9; 7528 7529 case DCORE2_RTR1: 7530 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; 7531 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; 7532 return 2; 7533 7534 case DCORE2_RTR2: 7535 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; 7536 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; 7537 return 2; 7538 7539 case DCORE2_RTR3: 7540 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; 7541 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; 7542 return 2; 7543 7544 case DCORE2_RTR4: 7545 case DCORE2_RTR5: 7546 case DCORE2_RTR6: 7547 case DCORE2_RTR7: 7548 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; 7549 return 1; 7550 case DCORE3_RTR0: 7551 case DCORE3_RTR1: 7552 case DCORE3_RTR2: 7553 case DCORE3_RTR3: 7554 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; 7555 return 1; 7556 case DCORE3_RTR4: 7557 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; 7558 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; 7559 return 2; 7560 case DCORE3_RTR5: 7561 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; 7562 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; 7563 return 2; 7564 case DCORE3_RTR6: 7565 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; 7566 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; 7567 return 2; 7568 case DCORE3_RTR7: 7569 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; 7570 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; 7571 engines[2] = GAUDI2_ENGINE_ID_NIC9_0; 7572 engines[3] = GAUDI2_ENGINE_ID_NIC10_0; 7573 engines[4] = GAUDI2_ENGINE_ID_NIC11_0; 7574 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; 7575 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; 7576 engines[7] = GAUDI2_ENGINE_ID_ROT_1; 7577 engines[8] = GAUDI2_ENGINE_ID_ROT_0; 7578 return 9; 7579 default: 7580 return 0; 7581 } 7582 } 7583 7584 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7585 u64 rtr_ctrl_base_addr, bool is_write, 7586 u64 *event_mask) 7587 { 7588 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7589 u32 razwi_hi, razwi_lo; 7590 u8 rd_wr_flag; 7591 7592 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7593 7594 if (is_write) { 7595 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); 7596 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); 7597 rd_wr_flag = HL_RAZWI_WRITE; 7598 7599 /* Clear set indication */ 7600 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); 7601 } else { 7602 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); 7603 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); 7604 rd_wr_flag = HL_RAZWI_READ; 7605 7606 /* Clear set indication */ 7607 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); 7608 } 7609 7610 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, 7611 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7612 dev_err_ratelimited(hdev->dev, 7613 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", 7614 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); 7615 7616 dev_err_ratelimited(hdev->dev, 7617 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7618 } 7619 7620 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7621 u64 rtr_ctrl_base_addr, bool is_write, 7622 u64 *event_mask) 7623 { 7624 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; 7625 u32 razwi_addr; 7626 u8 rd_wr_flag; 7627 7628 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); 7629 7630 if (is_write) { 7631 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); 7632 rd_wr_flag = HL_RAZWI_WRITE; 7633 7634 /* Clear set indication */ 7635 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); 7636 } else { 7637 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); 7638 rd_wr_flag = HL_RAZWI_READ; 7639 7640 /* Clear set indication */ 7641 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); 7642 } 7643 7644 hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, 7645 event_mask); 7646 dev_err_ratelimited(hdev->dev, 7647 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", 7648 is_write ? "WR" : "RD", rtr_id, razwi_addr); 7649 7650 dev_err_ratelimited(hdev->dev, 7651 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7652 } 7653 7654 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 7655 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 7656 { 7657 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, 7658 razwi_mask_info, razwi_intr = 0, error_count = 0; 7659 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES; 7660 u64 rtr_ctrl_base_addr; 7661 7662 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 7663 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 7664 if (!razwi_intr) 7665 return 0; 7666 } 7667 7668 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 7669 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); 7670 7671 dev_err_ratelimited(hdev->dev, 7672 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7673 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 7674 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 7675 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 7676 xy, 7677 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 7678 7679 if (xy == 0) { 7680 dev_err_ratelimited(hdev->dev, 7681 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); 7682 goto clear; 7683 } 7684 7685 /* Find router id by router coordinates */ 7686 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++) 7687 if (rtr_coordinates_to_rtr_id[rtr_id] == xy) 7688 break; 7689 7690 if (rtr_id == rtr_map_arr_len) { 7691 dev_err_ratelimited(hdev->dev, 7692 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy); 7693 goto clear; 7694 } 7695 7696 /* Find router mstr_if register base */ 7697 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; 7698 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; 7699 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET + 7700 dcore_rtr_id * DCORE_RTR_OFFSET; 7701 7702 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET); 7703 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET); 7704 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET); 7705 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); 7706 7707 if (hbw_aw_set) 7708 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7709 rtr_ctrl_base_addr, true, event_mask); 7710 7711 if (hbw_ar_set) 7712 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7713 rtr_ctrl_base_addr, false, event_mask); 7714 7715 if (lbw_aw_set) 7716 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7717 rtr_ctrl_base_addr, true, event_mask); 7718 7719 if (lbw_ar_set) 7720 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7721 rtr_ctrl_base_addr, false, event_mask); 7722 7723 error_count++; 7724 7725 clear: 7726 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 7727 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 7728 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 7729 7730 return error_count; 7731 } 7732 7733 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 7734 { 7735 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7736 7737 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 7738 7739 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 7740 if (sts_val & BIT(i)) { 7741 gaudi2_print_event(hdev, event_type, true, 7742 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 7743 sts_clr_val |= BIT(i); 7744 error_count++; 7745 } 7746 } 7747 7748 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 7749 7750 return error_count; 7751 } 7752 7753 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 7754 bool extended_err_check, u64 *event_mask) 7755 { 7756 enum razwi_event_sources module; 7757 u32 error_count = 0; 7758 u64 qman_base; 7759 u8 index; 7760 7761 switch (event_type) { 7762 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 7763 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 7764 qman_base = mmDCORE0_TPC0_QM_BASE + 7765 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 7766 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 7767 module = RAZWI_TPC; 7768 break; 7769 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 7770 qman_base = mmDCORE0_TPC6_QM_BASE; 7771 module = RAZWI_TPC; 7772 break; 7773 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 7774 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 7775 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 7776 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 7777 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 7778 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 7779 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 7780 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 7781 module = RAZWI_MME; 7782 break; 7783 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 7784 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 7785 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 7786 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 7787 module = RAZWI_PDMA; 7788 break; 7789 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 7790 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 7791 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 7792 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 7793 module = RAZWI_ROT; 7794 break; 7795 default: 7796 return 0; 7797 } 7798 7799 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7800 7801 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 7802 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 7803 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 7804 error_count += _gaudi2_handle_qm_sei_err(hdev, 7805 qman_base + NIC_QM_OFFSET, event_type); 7806 7807 if (extended_err_check) 7808 /* check if RAZWI happened */ 7809 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask); 7810 7811 return error_count; 7812 } 7813 7814 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type) 7815 { 7816 u32 qid_base, error_count = 0; 7817 u64 qman_base; 7818 u8 index; 7819 7820 switch (event_type) { 7821 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 7822 index = event_type - GAUDI2_EVENT_TPC0_QM; 7823 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 7824 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7825 break; 7826 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 7827 index = event_type - GAUDI2_EVENT_TPC6_QM; 7828 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 7829 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7830 break; 7831 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 7832 index = event_type - GAUDI2_EVENT_TPC12_QM; 7833 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 7834 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7835 break; 7836 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 7837 index = event_type - GAUDI2_EVENT_TPC18_QM; 7838 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 7839 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 7840 break; 7841 case GAUDI2_EVENT_TPC24_QM: 7842 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 7843 qman_base = mmDCORE0_TPC6_QM_BASE; 7844 break; 7845 case GAUDI2_EVENT_MME0_QM: 7846 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 7847 qman_base = mmDCORE0_MME_QM_BASE; 7848 break; 7849 case GAUDI2_EVENT_MME1_QM: 7850 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 7851 qman_base = mmDCORE1_MME_QM_BASE; 7852 break; 7853 case GAUDI2_EVENT_MME2_QM: 7854 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 7855 qman_base = mmDCORE2_MME_QM_BASE; 7856 break; 7857 case GAUDI2_EVENT_MME3_QM: 7858 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 7859 qman_base = mmDCORE3_MME_QM_BASE; 7860 break; 7861 case GAUDI2_EVENT_HDMA0_QM: 7862 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 7863 qman_base = mmDCORE0_EDMA0_QM_BASE; 7864 break; 7865 case GAUDI2_EVENT_HDMA1_QM: 7866 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 7867 qman_base = mmDCORE0_EDMA1_QM_BASE; 7868 break; 7869 case GAUDI2_EVENT_HDMA2_QM: 7870 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 7871 qman_base = mmDCORE1_EDMA0_QM_BASE; 7872 break; 7873 case GAUDI2_EVENT_HDMA3_QM: 7874 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 7875 qman_base = mmDCORE1_EDMA1_QM_BASE; 7876 break; 7877 case GAUDI2_EVENT_HDMA4_QM: 7878 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 7879 qman_base = mmDCORE2_EDMA0_QM_BASE; 7880 break; 7881 case GAUDI2_EVENT_HDMA5_QM: 7882 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 7883 qman_base = mmDCORE2_EDMA1_QM_BASE; 7884 break; 7885 case GAUDI2_EVENT_HDMA6_QM: 7886 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 7887 qman_base = mmDCORE3_EDMA0_QM_BASE; 7888 break; 7889 case GAUDI2_EVENT_HDMA7_QM: 7890 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 7891 qman_base = mmDCORE3_EDMA1_QM_BASE; 7892 break; 7893 case GAUDI2_EVENT_PDMA0_QM: 7894 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 7895 qman_base = mmPDMA0_QM_BASE; 7896 break; 7897 case GAUDI2_EVENT_PDMA1_QM: 7898 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 7899 qman_base = mmPDMA1_QM_BASE; 7900 break; 7901 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 7902 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 7903 qman_base = mmROT0_QM_BASE; 7904 break; 7905 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 7906 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 7907 qman_base = mmROT1_QM_BASE; 7908 break; 7909 default: 7910 return 0; 7911 } 7912 7913 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 7914 7915 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 7916 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) 7917 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 7918 7919 return error_count; 7920 } 7921 7922 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 7923 { 7924 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7925 7926 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); 7927 7928 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 7929 if (sts_val & BIT(i)) { 7930 gaudi2_print_event(hdev, event_type, true, 7931 "err cause: %s", gaudi2_arc_sei_error_cause[i]); 7932 sts_clr_val |= BIT(i); 7933 error_count++; 7934 } 7935 } 7936 7937 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); 7938 7939 return error_count; 7940 } 7941 7942 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 7943 { 7944 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 7945 7946 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 7947 7948 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 7949 if (sts_val & BIT(i)) { 7950 gaudi2_print_event(hdev, event_type, true, 7951 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 7952 sts_clr_val |= BIT(i); 7953 error_count++; 7954 } 7955 } 7956 7957 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 7958 7959 return error_count; 7960 } 7961 7962 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 7963 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 7964 u64 *event_mask) 7965 { 7966 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 7967 u32 error_count = 0; 7968 int i; 7969 7970 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 7971 if (intr_cause_data & BIT(i)) { 7972 gaudi2_print_event(hdev, event_type, true, 7973 "err cause: %s", guadi2_rot_error_cause[i]); 7974 error_count++; 7975 } 7976 7977 /* check if RAZWI happened */ 7978 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask); 7979 7980 return error_count; 7981 } 7982 7983 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 7984 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 7985 u64 *event_mask) 7986 { 7987 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 7988 u32 error_count = 0; 7989 int i; 7990 7991 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 7992 if (intr_cause_data & BIT(i)) { 7993 gaudi2_print_event(hdev, event_type, true, 7994 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 7995 error_count++; 7996 } 7997 7998 /* check if RAZWI happened */ 7999 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask); 8000 8001 return error_count; 8002 } 8003 8004 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8005 u64 *event_mask) 8006 { 8007 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8008 int i; 8009 8010 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8011 /* DCORE DEC */ 8012 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8013 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8014 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8015 else 8016 /* PCIE DEC */ 8017 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8018 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8019 8020 sts_val = RREG32(sts_addr); 8021 8022 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8023 if (sts_val & BIT(i)) { 8024 gaudi2_print_event(hdev, event_type, true, 8025 "err cause: %s", gaudi2_dec_error_cause[i]); 8026 sts_clr_val |= BIT(i); 8027 error_count++; 8028 } 8029 } 8030 8031 /* check if RAZWI happened */ 8032 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask); 8033 8034 /* Write 1 clear errors */ 8035 WREG32(sts_addr, sts_clr_val); 8036 8037 return error_count; 8038 } 8039 8040 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8041 u64 *event_mask) 8042 { 8043 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8044 int i; 8045 8046 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8047 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8048 8049 sts_val = RREG32(sts_addr); 8050 8051 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8052 if (sts_val & BIT(i)) { 8053 gaudi2_print_event(hdev, event_type, true, 8054 "err cause: %s", guadi2_mme_error_cause[i]); 8055 sts_clr_val |= BIT(i); 8056 error_count++; 8057 } 8058 } 8059 8060 /* check if RAZWI happened */ 8061 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8062 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask); 8063 8064 WREG32(sts_clr_addr, sts_clr_val); 8065 8066 return error_count; 8067 } 8068 8069 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8070 u64 intr_cause_data) 8071 { 8072 int i, error_count = 0; 8073 8074 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8075 if (intr_cause_data & BIT(i)) { 8076 gaudi2_print_event(hdev, event_type, true, 8077 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8078 error_count++; 8079 } 8080 8081 return error_count; 8082 } 8083 8084 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8085 u64 *event_mask) 8086 { 8087 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8088 int i; 8089 8090 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8091 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8092 8093 sts_val = RREG32(sts_addr); 8094 8095 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8096 if (sts_val & BIT(i)) { 8097 gaudi2_print_event(hdev, event_type, true, 8098 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8099 sts_clr_val |= BIT(i); 8100 error_count++; 8101 } 8102 } 8103 8104 /* check if RAZWI happened on WAP0/1 */ 8105 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask); 8106 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask); 8107 8108 WREG32(sts_clr_addr, sts_clr_val); 8109 8110 return error_count; 8111 } 8112 8113 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8114 u64 intr_cause_data) 8115 { 8116 u32 error_count = 0; 8117 int i; 8118 8119 /* If an AXI read or write error is received, an error is reported and 8120 * interrupt message is sent. Due to an HW errata, when reading the cause 8121 * register of the KDMA engine, the reported error is always HBW even if 8122 * the actual error caused by a LBW KDMA transaction. 8123 */ 8124 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8125 if (intr_cause_data & BIT(i)) { 8126 gaudi2_print_event(hdev, event_type, true, 8127 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8128 error_count++; 8129 } 8130 8131 return error_count; 8132 } 8133 8134 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, 8135 u64 intr_cause_data) 8136 { 8137 u32 error_count = 0; 8138 int i; 8139 8140 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8141 if (intr_cause_data & BIT(i)) { 8142 gaudi2_print_event(hdev, event_type, true, 8143 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8144 error_count++; 8145 } 8146 8147 return error_count; 8148 } 8149 8150 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8151 { 8152 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8153 8154 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8155 if (RREG32(razwi_happened_addr)) { 8156 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8157 GAUDI2_ENGINE_ID_PCIE, event_mask); 8158 WREG32(razwi_happened_addr, 0x1); 8159 } 8160 8161 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8162 if (RREG32(razwi_happened_addr)) { 8163 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8164 GAUDI2_ENGINE_ID_PCIE, event_mask); 8165 WREG32(razwi_happened_addr, 0x1); 8166 } 8167 8168 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8169 if (RREG32(razwi_happened_addr)) { 8170 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8171 GAUDI2_ENGINE_ID_PCIE, event_mask); 8172 WREG32(razwi_happened_addr, 0x1); 8173 } 8174 8175 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8176 if (RREG32(razwi_happened_addr)) { 8177 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8178 GAUDI2_ENGINE_ID_PCIE, event_mask); 8179 WREG32(razwi_happened_addr, 0x1); 8180 } 8181 } 8182 8183 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8184 u64 intr_cause_data, u64 *event_mask) 8185 { 8186 u32 error_count = 0; 8187 int i; 8188 8189 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8190 if (!(intr_cause_data & BIT_ULL(i))) 8191 continue; 8192 8193 gaudi2_print_event(hdev, event_type, true, 8194 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8195 error_count++; 8196 8197 switch (intr_cause_data & BIT_ULL(i)) { 8198 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 8199 break; 8200 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 8201 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8202 break; 8203 } 8204 } 8205 8206 return error_count; 8207 } 8208 8209 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8210 u64 intr_cause_data) 8211 8212 { 8213 u32 error_count = 0; 8214 int i; 8215 8216 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8217 if (intr_cause_data & BIT_ULL(i)) { 8218 gaudi2_print_event(hdev, event_type, true, 8219 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8220 error_count++; 8221 } 8222 } 8223 8224 return error_count; 8225 } 8226 8227 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8228 { 8229 u32 error_count = 0; 8230 int i; 8231 8232 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8233 if (intr_cause_data & BIT_ULL(i)) { 8234 gaudi2_print_event(hdev, event_type, true, 8235 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8236 error_count++; 8237 } 8238 } 8239 8240 return error_count; 8241 } 8242 8243 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8244 u64 *event_mask) 8245 { 8246 u32 valid, val; 8247 u64 addr; 8248 8249 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8250 8251 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8252 return; 8253 8254 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8255 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8256 addr <<= 32; 8257 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8258 8259 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", 8260 is_pmmu ? "PMMU" : "HMMU", addr); 8261 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8262 8263 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); 8264 } 8265 8266 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8267 { 8268 u32 valid, val; 8269 u64 addr; 8270 8271 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8272 8273 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8274 return; 8275 8276 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8277 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8278 addr <<= 32; 8279 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8280 8281 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8282 is_pmmu ? "PMMU" : "HMMU", addr); 8283 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); 8284 } 8285 8286 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8287 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8288 { 8289 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8290 int i; 8291 8292 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8293 8294 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8295 if (spi_sei_cause & BIT(i)) { 8296 gaudi2_print_event(hdev, event_type, true, 8297 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8298 8299 if (i == 0) 8300 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8301 else if (i == 1) 8302 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8303 8304 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8305 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8306 8307 error_count++; 8308 } 8309 } 8310 8311 /* Clear cause */ 8312 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8313 8314 /* Clear interrupt */ 8315 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8316 8317 return error_count; 8318 } 8319 8320 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 8321 { 8322 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 8323 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 8324 int i; 8325 8326 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 8327 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 8328 8329 sei_cause_val = RREG32(sei_cause_addr); 8330 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 8331 cq_intr_val = RREG32(cq_intr_addr); 8332 8333 /* SEI interrupt */ 8334 if (sei_cause_cause) { 8335 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 8336 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 8337 sei_cause_val); 8338 8339 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 8340 if (!(sei_cause_cause & BIT(i))) 8341 continue; 8342 8343 gaudi2_print_event(hdev, event_type, true, 8344 "err cause: %s. %s: 0x%X\n", 8345 gaudi2_sm_sei_cause[i].cause_name, 8346 gaudi2_sm_sei_cause[i].log_name, 8347 sei_cause_log); 8348 error_count++; 8349 break; 8350 } 8351 8352 /* Clear SM_SEI_CAUSE */ 8353 WREG32(sei_cause_addr, 0); 8354 } 8355 8356 /* CQ interrupt */ 8357 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 8358 cq_intr_queue_index = 8359 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 8360 cq_intr_val); 8361 8362 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 8363 sm_index, cq_intr_queue_index); 8364 error_count++; 8365 8366 /* Clear CQ_INTR */ 8367 WREG32(cq_intr_addr, 0); 8368 } 8369 8370 return error_count; 8371 } 8372 8373 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8374 { 8375 bool is_pmmu = false; 8376 u32 error_count = 0; 8377 u64 mmu_base; 8378 u8 index; 8379 8380 switch (event_type) { 8381 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 8382 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; 8383 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8384 break; 8385 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 8386 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); 8387 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8388 break; 8389 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 8390 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; 8391 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8392 break; 8393 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 8394 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); 8395 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8396 break; 8397 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 8398 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; 8399 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8400 break; 8401 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 8402 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); 8403 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8404 break; 8405 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 8406 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; 8407 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8408 break; 8409 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 8410 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); 8411 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8412 break; 8413 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 8414 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8415 is_pmmu = true; 8416 mmu_base = mmPMMU_HBW_MMU_BASE; 8417 break; 8418 default: 8419 return 0; 8420 } 8421 8422 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 8423 is_pmmu, event_mask); 8424 8425 return error_count; 8426 } 8427 8428 8429 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 8430 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 8431 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 8432 { 8433 u32 addr, beat, beat_shift; 8434 bool rc = false; 8435 8436 dev_err_ratelimited(hdev->dev, 8437 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 8438 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 8439 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 8440 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 8441 8442 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 8443 dev_err_ratelimited(hdev->dev, 8444 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 8445 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 8446 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 8447 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 8448 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 8449 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 8450 8451 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 8452 for (beat = 0 ; beat < 4 ; beat++) { 8453 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8454 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 8455 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 8456 beat, 8457 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8458 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8459 8460 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8461 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 8462 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 8463 beat, 8464 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8465 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 8466 rc |= true; 8467 } 8468 8469 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 8470 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8471 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 8472 dev_err_ratelimited(hdev->dev, 8473 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 8474 beat, 8475 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 8476 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 8477 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 8478 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 8479 rc |= true; 8480 } 8481 8482 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 8483 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8484 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 8485 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 8486 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 8487 } 8488 8489 return rc; 8490 } 8491 8492 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 8493 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 8494 { 8495 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 8496 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 8497 8498 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 8499 8500 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 8501 derr & 0x3, derr & 0xc); 8502 8503 /* JIRA H6-3286 - the following prints may not be valid */ 8504 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 8505 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 8506 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 8507 dev_err_ratelimited(hdev->dev, 8508 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 8509 i, 8510 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 8511 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 8512 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 8513 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 8514 } 8515 } 8516 8517 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 8518 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 8519 { 8520 __le32 *col_cmd = ca_par_err_data->dbg_col; 8521 __le16 *row_cmd = ca_par_err_data->dbg_row; 8522 u32 i; 8523 8524 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 8525 8526 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 8527 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 8528 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 8529 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 8530 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 8531 } 8532 8533 /* Returns true if hard reset is needed or false otherwise */ 8534 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 8535 struct hl_eq_hbm_sei_data *sei_data) 8536 { 8537 bool require_hard_reset = false; 8538 u32 hbm_id, mc_id, cause_idx; 8539 8540 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 8541 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 8542 8543 cause_idx = sei_data->hdr.sei_cause; 8544 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 8545 gaudi2_print_event(hdev, event_type, true, 8546 "err cause: %s", 8547 "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx); 8548 return true; 8549 } 8550 8551 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 8552 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8553 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 8554 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8555 hbm_mc_sei_cause[cause_idx]); 8556 8557 /* Print error-specific info */ 8558 switch (cause_idx) { 8559 case HBM_SEI_CATTRIP: 8560 require_hard_reset = true; 8561 break; 8562 8563 case HBM_SEI_CMD_PARITY_EVEN: 8564 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 8565 le32_to_cpu(sei_data->hdr.cnt)); 8566 require_hard_reset = true; 8567 break; 8568 8569 case HBM_SEI_CMD_PARITY_ODD: 8570 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 8571 le32_to_cpu(sei_data->hdr.cnt)); 8572 require_hard_reset = true; 8573 break; 8574 8575 case HBM_SEI_WRITE_DATA_PARITY_ERR: 8576 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 8577 le32_to_cpu(sei_data->hdr.cnt)); 8578 require_hard_reset = true; 8579 break; 8580 8581 case HBM_SEI_READ_ERR: 8582 /* Unlike other SEI events, read error requires further processing of the 8583 * raw data in order to determine the root cause. 8584 */ 8585 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 8586 &sei_data->read_err_info, 8587 le32_to_cpu(sei_data->hdr.cnt)); 8588 break; 8589 8590 default: 8591 break; 8592 } 8593 8594 require_hard_reset |= !!sei_data->hdr.is_critical; 8595 8596 return require_hard_reset; 8597 } 8598 8599 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 8600 u64 intr_cause_data) 8601 { 8602 if (intr_cause_data) { 8603 gaudi2_print_event(hdev, event_type, true, 8604 "temperature error cause: %#llx", intr_cause_data); 8605 return 1; 8606 } 8607 8608 return 0; 8609 } 8610 8611 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 8612 { 8613 u32 i, error_count = 0; 8614 8615 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 8616 if (intr_cause_data & hbm_mc_spi[i].mask) { 8617 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 8618 hbm_mc_spi[i].cause); 8619 error_count++; 8620 } 8621 8622 return error_count; 8623 } 8624 8625 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8626 { 8627 ktime_t zero_time = ktime_set(0, 0); 8628 8629 mutex_lock(&hdev->clk_throttling.lock); 8630 8631 switch (event_type) { 8632 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 8633 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 8634 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 8635 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 8636 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 8637 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 8638 break; 8639 8640 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 8641 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 8642 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 8643 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 8644 break; 8645 8646 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 8647 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 8648 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 8649 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 8650 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 8651 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8652 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 8653 break; 8654 8655 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 8656 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 8657 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 8658 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8659 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 8660 break; 8661 8662 default: 8663 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 8664 break; 8665 } 8666 8667 mutex_unlock(&hdev->clk_throttling.lock); 8668 } 8669 8670 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 8671 struct cpucp_pkt_sync_err *sync_err) 8672 { 8673 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8674 8675 gaudi2_print_event(hdev, event_type, false, 8676 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8677 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 8678 q->pi, atomic_read(&q->ci)); 8679 } 8680 8681 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 8682 { 8683 u32 p2p_intr, msix_gw_intr, error_count = 0; 8684 8685 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 8686 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 8687 8688 if (p2p_intr) { 8689 gaudi2_print_event(hdev, event_type, true, 8690 "pcie p2p transaction terminated due to security, req_id(0x%x)\n", 8691 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 8692 8693 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 8694 error_count++; 8695 } 8696 8697 if (msix_gw_intr) { 8698 gaudi2_print_event(hdev, event_type, true, 8699 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n", 8700 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 8701 8702 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 8703 error_count++; 8704 } 8705 8706 return error_count; 8707 } 8708 8709 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 8710 struct hl_eq_pcie_drain_ind_data *drain_data) 8711 { 8712 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 8713 8714 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 8715 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 8716 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 8717 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 8718 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 8719 8720 if (cause & BIT_ULL(0)) { 8721 dev_err_ratelimited(hdev->dev, 8722 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 8723 !!lbw_rd, !!lbw_wr); 8724 error_count++; 8725 } 8726 8727 if (cause & BIT_ULL(1)) { 8728 dev_err_ratelimited(hdev->dev, 8729 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 8730 hbw_rd, hbw_wr); 8731 error_count++; 8732 } 8733 8734 return error_count; 8735 } 8736 8737 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 8738 { 8739 u32 error_count = 0; 8740 int i; 8741 8742 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 8743 if (intr_cause_data & BIT_ULL(i)) { 8744 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 8745 gaudi2_psoc_axi_drain_interrupts_cause[i]); 8746 error_count++; 8747 } 8748 } 8749 8750 return error_count; 8751 } 8752 8753 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 8754 struct cpucp_pkt_sync_err *sync_err) 8755 { 8756 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 8757 8758 gaudi2_print_event(hdev, event_type, false, 8759 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 8760 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 8761 } 8762 8763 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 8764 struct hl_eq_engine_arc_intr_data *data) 8765 { 8766 struct hl_engine_arc_dccm_queue_full_irq *q; 8767 u32 intr_type, engine_id; 8768 u64 payload; 8769 8770 intr_type = le32_to_cpu(data->intr_type); 8771 engine_id = le32_to_cpu(data->engine_id); 8772 payload = le64_to_cpu(data->payload); 8773 8774 switch (intr_type) { 8775 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 8776 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 8777 8778 gaudi2_print_event(hdev, event_type, true, 8779 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n", 8780 engine_id, intr_type, q->queue_index); 8781 return 1; 8782 default: 8783 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n"); 8784 return 0; 8785 } 8786 } 8787 8788 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 8789 { 8790 struct gaudi2_device *gaudi2 = hdev->asic_specific; 8791 bool reset_required = false, is_critical = false; 8792 u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0; 8793 u64 event_mask = 0; 8794 u16 event_type; 8795 8796 ctl = le32_to_cpu(eq_entry->hdr.ctl); 8797 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 8798 8799 if (event_type >= GAUDI2_EVENT_SIZE) { 8800 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 8801 event_type, GAUDI2_EVENT_SIZE - 1); 8802 return; 8803 } 8804 8805 gaudi2->events_stat[event_type]++; 8806 gaudi2->events_stat_aggregate[event_type]++; 8807 8808 switch (event_type) { 8809 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 8810 fallthrough; 8811 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 8812 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8813 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8814 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 8815 is_critical = eq_entry->ecc_data.is_critical; 8816 error_count++; 8817 break; 8818 8819 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 8820 fallthrough; 8821 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8822 fallthrough; 8823 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 8824 error_count = gaudi2_handle_qman_err(hdev, event_type); 8825 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8826 break; 8827 8828 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 8829 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8830 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 8831 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8832 break; 8833 8834 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 8835 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 8836 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8837 break; 8838 8839 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8840 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8841 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8842 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask); 8843 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8844 break; 8845 8846 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 8847 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 8848 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8849 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 8850 &eq_entry->razwi_with_intr_cause, &event_mask); 8851 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 8852 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8853 break; 8854 8855 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 8856 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 8857 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8858 &eq_entry->razwi_with_intr_cause, &event_mask); 8859 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 8860 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8861 break; 8862 8863 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 8864 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 8865 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 8866 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8867 break; 8868 8869 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 8870 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 8871 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 8872 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 8873 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 8874 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 8875 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 8876 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 8877 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 8878 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 8879 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 8880 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 8881 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 8882 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 8883 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 8884 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 8885 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 8886 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 8887 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 8888 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 8889 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 8890 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 8891 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 8892 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 8893 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 8894 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 8895 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 8896 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 8897 &eq_entry->razwi_with_intr_cause, &event_mask); 8898 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8899 break; 8900 8901 case GAUDI2_EVENT_DEC0_SPI: 8902 case GAUDI2_EVENT_DEC1_SPI: 8903 case GAUDI2_EVENT_DEC2_SPI: 8904 case GAUDI2_EVENT_DEC3_SPI: 8905 case GAUDI2_EVENT_DEC4_SPI: 8906 case GAUDI2_EVENT_DEC5_SPI: 8907 case GAUDI2_EVENT_DEC6_SPI: 8908 case GAUDI2_EVENT_DEC7_SPI: 8909 case GAUDI2_EVENT_DEC8_SPI: 8910 case GAUDI2_EVENT_DEC9_SPI: 8911 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 8912 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 8913 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 8914 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8915 break; 8916 8917 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 8918 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 8919 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 8920 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 8921 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 8922 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 8923 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 8924 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 8925 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 8926 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8927 break; 8928 8929 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 8930 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 8931 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 8932 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 8933 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 8934 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 8935 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 8936 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 8937 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8938 break; 8939 8940 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 8941 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 8942 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 8943 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 8944 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 8945 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 8946 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 8947 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask); 8948 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8949 break; 8950 8951 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 8952 case GAUDI2_EVENT_KDMA0_CORE: 8953 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 8954 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8955 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8956 break; 8957 8958 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: 8959 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 8960 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8961 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8962 break; 8963 8964 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 8965 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 8966 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 8967 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8968 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8969 break; 8970 8971 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 8972 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 8973 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 8974 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8975 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 8976 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8977 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8978 break; 8979 8980 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 8981 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 8982 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8983 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8984 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8985 break; 8986 8987 case GAUDI2_EVENT_PMMU_FATAL_0: 8988 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 8989 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8990 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8991 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8992 break; 8993 8994 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 8995 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 8996 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8997 break; 8998 8999 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9000 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9001 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9002 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9003 reset_required = true; 9004 } 9005 error_count++; 9006 break; 9007 9008 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9009 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9010 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9011 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9012 break; 9013 9014 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9015 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9016 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9017 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9018 break; 9019 9020 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9021 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9022 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9023 break; 9024 9025 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9026 error_count = gaudi2_handle_psoc_drain(hdev, 9027 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9028 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9029 break; 9030 9031 case GAUDI2_EVENT_CPU_AXI_ECC: 9032 error_count = GAUDI2_NA_EVENT_CAUSE; 9033 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9034 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9035 break; 9036 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9037 error_count = GAUDI2_NA_EVENT_CAUSE; 9038 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9039 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9040 break; 9041 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9042 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9043 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9044 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9045 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9046 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9047 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9048 break; 9049 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9050 error_count = GAUDI2_NA_EVENT_CAUSE; 9051 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9052 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9053 break; 9054 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9055 error_count = GAUDI2_NA_EVENT_CAUSE; 9056 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9057 break; 9058 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9059 error_count = GAUDI2_NA_EVENT_CAUSE; 9060 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9061 break; 9062 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9063 error_count = GAUDI2_NA_EVENT_CAUSE; 9064 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9065 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9066 break; 9067 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9068 error_count = GAUDI2_NA_EVENT_CAUSE; 9069 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9070 break; 9071 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9072 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9073 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9074 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9075 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9076 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9077 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9078 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9079 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9080 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9081 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9082 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9083 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9084 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9085 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9086 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9087 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9088 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9089 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9090 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9091 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9092 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9093 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9094 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9095 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9096 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9097 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9098 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9099 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9100 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9101 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9102 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9103 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9104 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9105 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9106 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9107 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9108 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9109 fallthrough; 9110 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9111 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9112 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9113 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9114 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9115 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9116 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9117 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9118 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9119 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9120 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9121 error_count = GAUDI2_NA_EVENT_CAUSE; 9122 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9123 break; 9124 9125 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9126 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9127 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9128 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9129 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9130 error_count = GAUDI2_NA_EVENT_CAUSE; 9131 break; 9132 9133 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9134 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9135 error_count = GAUDI2_NA_EVENT_CAUSE; 9136 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9137 break; 9138 9139 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9140 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9141 error_count = GAUDI2_NA_EVENT_CAUSE; 9142 /* Do nothing- FW will handle it */ 9143 break; 9144 9145 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9146 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9147 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9148 break; 9149 9150 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9151 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9152 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9153 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9154 break; 9155 9156 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9157 error_count = GAUDI2_NA_EVENT_CAUSE; 9158 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9159 break; 9160 9161 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9162 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9163 le64_to_cpu(eq_entry->data[0])); 9164 error_count = GAUDI2_NA_EVENT_CAUSE; 9165 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9166 break; 9167 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9168 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9169 le64_to_cpu(eq_entry->data[0])); 9170 error_count = GAUDI2_NA_EVENT_CAUSE; 9171 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9172 break; 9173 9174 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9175 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9176 error_count = GAUDI2_NA_EVENT_CAUSE; 9177 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9178 break; 9179 9180 case GAUDI2_EVENT_ARC_DCCM_FULL: 9181 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9182 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9183 break; 9184 9185 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9186 case GAUDI2_EVENT_DEV_RESET_REQ: 9187 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9188 error_count = GAUDI2_NA_EVENT_CAUSE; 9189 is_critical = true; 9190 break; 9191 9192 default: 9193 if (gaudi2_irq_map_table[event_type].valid) { 9194 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9195 event_type); 9196 error_count = GAUDI2_NA_EVENT_CAUSE; 9197 } 9198 } 9199 9200 /* Make sure to dump an error in case no error cause was printed so far. 9201 * Note that although we have counted the errors, we use this number as 9202 * a boolean. 9203 */ 9204 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9205 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9206 else if (error_count == 0) 9207 gaudi2_print_event(hdev, event_type, true, 9208 "No error cause for H/W event %u\n", event_type); 9209 9210 if ((gaudi2_irq_map_table[event_type].reset || reset_required) && 9211 (hdev->hard_reset_on_fw_events || 9212 (hdev->asic_prop.fw_security_enabled && is_critical))) 9213 goto reset_device; 9214 9215 /* Send unmask irq only for interrupts not classified as MSG */ 9216 if (!gaudi2_irq_map_table[event_type].msg) 9217 hl_fw_unmask_irq(hdev, event_type); 9218 9219 if (event_mask) 9220 hl_notifier_event_send_all(hdev, event_mask); 9221 9222 return; 9223 9224 reset_device: 9225 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9226 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9227 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9228 } else { 9229 reset_flags |= HL_DRV_RESET_DELAY; 9230 } 9231 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 9232 hl_device_cond_reset(hdev, reset_flags, event_mask); 9233 } 9234 9235 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 9236 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 9237 u32 hw_queue_id, u32 size, u64 addr, u32 val) 9238 { 9239 u32 ctl, pkt_size; 9240 int rc = 0; 9241 9242 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 9243 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 9244 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 9245 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 9246 9247 lin_dma_pkt->ctl = cpu_to_le32(ctl); 9248 lin_dma_pkt->src_addr = cpu_to_le64(val); 9249 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 9250 lin_dma_pkt->tsize = cpu_to_le32(size); 9251 9252 pkt_size = sizeof(struct packet_lin_dma); 9253 9254 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 9255 if (rc) 9256 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 9257 hw_queue_id); 9258 9259 return rc; 9260 } 9261 9262 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 9263 { 9264 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 9265 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 9266 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 9267 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 9268 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 9269 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 9270 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 9271 struct asic_fixed_properties *prop = &hdev->asic_prop; 9272 void *lin_dma_pkts_arr; 9273 dma_addr_t pkt_dma_addr; 9274 int rc = 0, dma_num = 0; 9275 9276 if (prop->edma_enabled_mask == 0) { 9277 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 9278 return -EIO; 9279 } 9280 9281 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9282 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 9283 comp_addr = CFG_BASE + sob_addr; 9284 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 9285 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 9286 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 9287 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 9288 9289 /* Calculate how many lin dma pkts we'll need */ 9290 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 9291 pkt_size = sizeof(struct packet_lin_dma); 9292 9293 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 9294 &pkt_dma_addr, GFP_KERNEL); 9295 if (!lin_dma_pkts_arr) 9296 return -ENOMEM; 9297 9298 /* 9299 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 9300 * only the first one to restore later 9301 * also set the sob addr for all edma cores for completion. 9302 * set QM as trusted to allow it to access physical address with MMU bp. 9303 */ 9304 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 9305 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9306 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9307 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9308 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9309 9310 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9311 continue; 9312 9313 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 9314 edma_offset, mmubp); 9315 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 9316 lower_32_bits(comp_addr)); 9317 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 9318 upper_32_bits(comp_addr)); 9319 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 9320 comp_val); 9321 gaudi2_qman_set_test_mode(hdev, 9322 edma_queues_id[dcore] + 4 * edma_idx, true); 9323 } 9324 } 9325 9326 WREG32(sob_addr, 0); 9327 9328 while (cur_addr < end_addr) { 9329 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9330 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9331 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9332 9333 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9334 continue; 9335 9336 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 9337 9338 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 9339 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 9340 pkt_dma_addr + dma_num * pkt_size, 9341 edma_queues_id[dcore] + edma_idx * 4, 9342 chunk_size, cur_addr, val); 9343 if (rc) 9344 goto end; 9345 9346 dma_num++; 9347 cur_addr += chunk_size; 9348 if (cur_addr == end_addr) 9349 break; 9350 } 9351 } 9352 } 9353 9354 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 9355 if (rc) { 9356 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 9357 goto end; 9358 } 9359 end: 9360 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9361 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9362 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9363 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9364 9365 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9366 continue; 9367 9368 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 9369 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 9370 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 9371 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 9372 gaudi2_qman_set_test_mode(hdev, 9373 edma_queues_id[dcore] + 4 * edma_idx, false); 9374 } 9375 } 9376 9377 WREG32(sob_addr, 0); 9378 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 9379 9380 return rc; 9381 } 9382 9383 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 9384 { 9385 int rc; 9386 struct asic_fixed_properties *prop = &hdev->asic_prop; 9387 u64 size = prop->dram_end_address - prop->dram_user_base_address; 9388 9389 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 9390 9391 if (rc) 9392 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 9393 prop->dram_user_base_address, size); 9394 return rc; 9395 } 9396 9397 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 9398 { 9399 int rc; 9400 struct asic_fixed_properties *prop = &hdev->asic_prop; 9401 u64 val = hdev->memory_scrub_val; 9402 u64 addr, size; 9403 9404 if (!hdev->memory_scrub) 9405 return 0; 9406 9407 /* scrub SRAM */ 9408 addr = prop->sram_user_base_address; 9409 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 9410 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 9411 addr, addr + size, val); 9412 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 9413 if (rc) { 9414 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 9415 return rc; 9416 } 9417 9418 /* scrub DRAM */ 9419 rc = gaudi2_scrub_device_dram(hdev, val); 9420 if (rc) { 9421 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 9422 return rc; 9423 } 9424 return 0; 9425 } 9426 9427 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 9428 { 9429 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 9430 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 9431 u32 val, size, offset; 9432 int dcore_id; 9433 9434 offset = hdev->asic_prop.first_available_cq[0] * 4; 9435 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 9436 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 9437 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 9438 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 9439 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 9440 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 9441 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 9442 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 9443 9444 /* memset dcore0 CQ registers */ 9445 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9446 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9447 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9448 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9449 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9450 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9451 9452 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 9453 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 9454 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 9455 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 9456 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 9457 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 9458 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 9459 9460 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9461 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 9462 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 9463 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 9464 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 9465 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 9466 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 9467 9468 cq_lbw_l_addr += DCORE_OFFSET; 9469 cq_lbw_h_addr += DCORE_OFFSET; 9470 cq_lbw_data_addr += DCORE_OFFSET; 9471 cq_base_l_addr += DCORE_OFFSET; 9472 cq_base_h_addr += DCORE_OFFSET; 9473 cq_size_addr += DCORE_OFFSET; 9474 } 9475 9476 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 9477 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 9478 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 9479 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 9480 9481 /* memset dcore0 monitors */ 9482 gaudi2_memset_device_lbw(hdev, addr, size, val); 9483 9484 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 9485 gaudi2_memset_device_lbw(hdev, addr, size, 0); 9486 9487 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 9488 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 9489 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 9490 9491 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9492 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 9493 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 9494 mon_sts_addr += DCORE_OFFSET; 9495 mon_cfg_addr += DCORE_OFFSET; 9496 } 9497 9498 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9499 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 9500 val = 0; 9501 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 9502 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9503 9504 /* memset dcore0 sobs */ 9505 gaudi2_memset_device_lbw(hdev, addr, size, val); 9506 9507 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 9508 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 9509 9510 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 9511 gaudi2_memset_device_lbw(hdev, addr, size, val); 9512 addr += DCORE_OFFSET; 9513 } 9514 9515 /* Flush all WREG to prevent race */ 9516 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 9517 } 9518 9519 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 9520 { 9521 u32 reg_base, hw_queue_id; 9522 9523 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 9524 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9525 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9526 continue; 9527 9528 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9529 9530 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9531 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9532 } 9533 9534 /* Flush all WREG to prevent race */ 9535 RREG32(mmPDMA0_QM_ARB_CFG_0); 9536 } 9537 9538 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 9539 { 9540 u32 reg_base, hw_queue_id; 9541 9542 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 9543 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 9544 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 9545 continue; 9546 9547 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 9548 9549 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 9550 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 9551 } 9552 9553 /* Flush all WREG to prevent race */ 9554 RREG32(mmPDMA0_QM_ARB_CFG_0); 9555 } 9556 9557 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 9558 { 9559 return 0; 9560 } 9561 9562 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 9563 { 9564 } 9565 9566 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 9567 struct dup_block_ctx *cfg_ctx) 9568 { 9569 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 9570 u8 seq; 9571 int i; 9572 9573 for (i = 0 ; i < cfg_ctx->instances ; i++) { 9574 seq = block_idx * cfg_ctx->instances + i; 9575 9576 /* skip disabled instance */ 9577 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 9578 continue; 9579 9580 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 9581 cfg_ctx->data); 9582 } 9583 } 9584 9585 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 9586 u64 mask) 9587 { 9588 int i; 9589 9590 cfg_ctx->enabled_mask = mask; 9591 9592 for (i = 0 ; i < cfg_ctx->blocks ; i++) 9593 gaudi2_init_block_instances(hdev, i, cfg_ctx); 9594 } 9595 9596 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 9597 { 9598 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 9599 } 9600 9601 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 9602 { 9603 void *host_mem_virtual_addr; 9604 dma_addr_t host_mem_dma_addr; 9605 u64 reserved_va_base; 9606 u32 pos, size_left, size_to_dma; 9607 struct hl_ctx *ctx; 9608 int rc = 0; 9609 9610 /* Fetch the ctx */ 9611 ctx = hl_get_compute_ctx(hdev); 9612 if (!ctx) { 9613 dev_err(hdev->dev, "No ctx available\n"); 9614 return -EINVAL; 9615 } 9616 9617 /* Allocate buffers for read and for poll */ 9618 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 9619 GFP_KERNEL | __GFP_ZERO); 9620 if (host_mem_virtual_addr == NULL) { 9621 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 9622 rc = -ENOMEM; 9623 goto put_ctx; 9624 } 9625 9626 /* Reserve VM region on asic side */ 9627 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 9628 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9629 if (!reserved_va_base) { 9630 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 9631 rc = -ENOMEM; 9632 goto free_data_buffer; 9633 } 9634 9635 /* Create mapping on asic side */ 9636 mutex_lock(&hdev->mmu_lock); 9637 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 9638 hl_mmu_invalidate_cache_range(hdev, false, 9639 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 9640 ctx->asid, reserved_va_base, SZ_2M); 9641 mutex_unlock(&hdev->mmu_lock); 9642 if (rc) { 9643 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 9644 goto unreserve_va; 9645 } 9646 9647 /* Enable MMU on KDMA */ 9648 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 9649 9650 pos = 0; 9651 size_left = size; 9652 size_to_dma = SZ_2M; 9653 9654 while (size_left > 0) { 9655 if (size_left < SZ_2M) 9656 size_to_dma = size_left; 9657 9658 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 9659 if (rc) 9660 break; 9661 9662 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 9663 9664 if (size_left <= SZ_2M) 9665 break; 9666 9667 pos += SZ_2M; 9668 addr += SZ_2M; 9669 size_left -= SZ_2M; 9670 } 9671 9672 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 9673 9674 mutex_lock(&hdev->mmu_lock); 9675 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 9676 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 9677 ctx->asid, reserved_va_base, SZ_2M); 9678 mutex_unlock(&hdev->mmu_lock); 9679 unreserve_va: 9680 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 9681 free_data_buffer: 9682 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 9683 put_ctx: 9684 hl_ctx_put(ctx); 9685 9686 return rc; 9687 } 9688 9689 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 9690 { 9691 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9692 int min_alloc_order, rc; 9693 9694 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9695 return 0; 9696 9697 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 9698 HOST_SPACE_INTERNAL_CB_SZ, 9699 &hdev->internal_cb_pool_dma_addr, 9700 GFP_KERNEL | __GFP_ZERO); 9701 9702 if (!hdev->internal_cb_pool_virt_addr) 9703 return -ENOMEM; 9704 9705 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 9706 gaudi2_get_wait_cb_size(hdev))); 9707 9708 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 9709 if (!hdev->internal_cb_pool) { 9710 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 9711 rc = -ENOMEM; 9712 goto free_internal_cb_pool; 9713 } 9714 9715 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 9716 HOST_SPACE_INTERNAL_CB_SZ, -1); 9717 if (rc) { 9718 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 9719 rc = -EFAULT; 9720 goto destroy_internal_cb_pool; 9721 } 9722 9723 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 9724 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 9725 9726 if (!hdev->internal_cb_va_base) { 9727 rc = -ENOMEM; 9728 goto destroy_internal_cb_pool; 9729 } 9730 9731 mutex_lock(&hdev->mmu_lock); 9732 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 9733 HOST_SPACE_INTERNAL_CB_SZ); 9734 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 9735 mutex_unlock(&hdev->mmu_lock); 9736 9737 if (rc) 9738 goto unreserve_internal_cb_pool; 9739 9740 return 0; 9741 9742 unreserve_internal_cb_pool: 9743 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9744 destroy_internal_cb_pool: 9745 gen_pool_destroy(hdev->internal_cb_pool); 9746 free_internal_cb_pool: 9747 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9748 hdev->internal_cb_pool_dma_addr); 9749 9750 return rc; 9751 } 9752 9753 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 9754 { 9755 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9756 9757 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9758 return; 9759 9760 mutex_lock(&hdev->mmu_lock); 9761 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9762 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9763 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 9764 mutex_unlock(&hdev->mmu_lock); 9765 9766 gen_pool_destroy(hdev->internal_cb_pool); 9767 9768 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 9769 hdev->internal_cb_pool_dma_addr); 9770 } 9771 9772 static void gaudi2_restore_user_registers(struct hl_device *hdev) 9773 { 9774 gaudi2_restore_user_sm_registers(hdev); 9775 gaudi2_restore_user_qm_registers(hdev); 9776 } 9777 9778 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9779 { 9780 struct hl_device *hdev = ctx->hdev; 9781 struct asic_fixed_properties *prop = &hdev->asic_prop; 9782 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9783 int rc; 9784 9785 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9786 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 9787 if (rc) 9788 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 9789 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9790 9791 return rc; 9792 } 9793 9794 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 9795 { 9796 struct hl_device *hdev = ctx->hdev; 9797 struct asic_fixed_properties *prop = &hdev->asic_prop; 9798 int rc; 9799 9800 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 9801 prop->pmmu.page_size, true); 9802 if (rc) 9803 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 9804 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 9805 } 9806 9807 static int gaudi2_ctx_init(struct hl_ctx *ctx) 9808 { 9809 int rc; 9810 9811 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 9812 if (rc) 9813 return rc; 9814 9815 /* No need to clear user registers if the device has just 9816 * performed reset, we restore only nic qm registers 9817 */ 9818 if (ctx->hdev->reset_upon_device_release) 9819 gaudi2_restore_nic_qm_registers(ctx->hdev); 9820 else 9821 gaudi2_restore_user_registers(ctx->hdev); 9822 9823 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 9824 if (rc) 9825 return rc; 9826 9827 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 9828 if (rc) 9829 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9830 9831 return rc; 9832 } 9833 9834 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 9835 { 9836 if (ctx->asid == HL_KERNEL_ASID_ID) 9837 return; 9838 9839 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 9840 9841 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 9842 } 9843 9844 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 9845 { 9846 struct hl_device *hdev = cs->ctx->hdev; 9847 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 9848 u32 mon_payload, sob_id, mon_id; 9849 9850 if (!cs_needs_completion(cs)) 9851 return 0; 9852 9853 /* 9854 * First 64 SOB/MON are reserved for driver for QMAN auto completion 9855 * mechanism. Each SOB/MON pair are used for a pending CS with the same 9856 * cyclic index. The SOB value is increased when each of the CS jobs is 9857 * completed. When the SOB reaches the number of CS jobs, the monitor 9858 * generates MSI-X interrupt. 9859 */ 9860 9861 sob_id = mon_id = index; 9862 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 9863 (1 << CQ_ENTRY_READY_SHIFT) | index; 9864 9865 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 9866 cs->jobs_cnt); 9867 9868 return 0; 9869 } 9870 9871 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 9872 { 9873 return HL_INVALID_QUEUE; 9874 } 9875 9876 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 9877 { 9878 struct hl_cb *cb = data; 9879 struct packet_msg_short *pkt; 9880 u32 value, ctl, pkt_size = sizeof(*pkt); 9881 9882 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 9883 memset(pkt, 0, pkt_size); 9884 9885 /* Inc by 1, Mode ADD */ 9886 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 9887 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 9888 9889 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 9890 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 9891 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9892 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 9893 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 9894 9895 pkt->value = cpu_to_le32(value); 9896 pkt->ctl = cpu_to_le32(ctl); 9897 9898 return size + pkt_size; 9899 } 9900 9901 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 9902 { 9903 u32 ctl, pkt_size = sizeof(*pkt); 9904 9905 memset(pkt, 0, pkt_size); 9906 9907 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 9908 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 9909 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9910 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 9911 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 9912 9913 pkt->value = cpu_to_le32(value); 9914 pkt->ctl = cpu_to_le32(ctl); 9915 9916 return pkt_size; 9917 } 9918 9919 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 9920 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 9921 { 9922 u32 ctl, value, pkt_size = sizeof(*pkt); 9923 u8 mask; 9924 9925 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 9926 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 9927 return 0; 9928 } 9929 9930 memset(pkt, 0, pkt_size); 9931 9932 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 9933 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 9934 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 9935 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 9936 9937 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 9938 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 9939 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 9940 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 9941 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 9942 9943 pkt->value = cpu_to_le32(value); 9944 pkt->ctl = cpu_to_le32(ctl); 9945 9946 return pkt_size; 9947 } 9948 9949 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 9950 { 9951 u32 ctl, cfg, pkt_size = sizeof(*pkt); 9952 9953 memset(pkt, 0, pkt_size); 9954 9955 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 9956 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 9957 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 9958 9959 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 9960 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 9961 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 9962 9963 pkt->cfg = cpu_to_le32(cfg); 9964 pkt->ctl = cpu_to_le32(ctl); 9965 9966 return pkt_size; 9967 } 9968 9969 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 9970 { 9971 struct hl_cb *cb = prop->data; 9972 void *buf = (void *) (uintptr_t) (cb->kernel_address); 9973 9974 u64 monitor_base, fence_addr = 0; 9975 u32 stream_index, size = prop->size; 9976 u16 msg_addr_offset; 9977 9978 stream_index = prop->q_idx % 4; 9979 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 9980 QM_FENCE2_OFFSET + stream_index * 4; 9981 9982 /* 9983 * monitor_base should be the content of the base0 address registers, 9984 * so it will be added to the msg short offsets 9985 */ 9986 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 9987 9988 /* First monitor config packet: low address of the sync */ 9989 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 9990 monitor_base; 9991 9992 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 9993 9994 /* Second monitor config packet: high address of the sync */ 9995 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 9996 monitor_base; 9997 9998 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 9999 10000 /* 10001 * Third monitor config packet: the payload, i.e. what to write when the 10002 * sync triggers 10003 */ 10004 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10005 monitor_base; 10006 10007 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10008 10009 /* Fourth monitor config packet: bind the monitor to a sync object */ 10010 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10011 10012 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10013 prop->sob_val, msg_addr_offset); 10014 10015 /* Fence packet */ 10016 size += gaudi2_add_fence_pkt(buf + size); 10017 10018 return size; 10019 } 10020 10021 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10022 { 10023 struct hl_hw_sob *hw_sob = data; 10024 10025 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10026 10027 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10028 10029 kref_init(&hw_sob->kref); 10030 } 10031 10032 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10033 { 10034 } 10035 10036 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10037 { 10038 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10039 10040 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10041 } 10042 10043 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10044 { 10045 return 0; 10046 } 10047 10048 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10049 struct hl_cs *cs, u32 wait_queue_id, 10050 u32 collective_engine_id, u32 encaps_signal_offset) 10051 { 10052 return -EINVAL; 10053 } 10054 10055 /* 10056 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10057 * to DMMU page-size address (64MB) before mapping it in 10058 * the MMU. 10059 * The operation is performed on both the virtual and physical addresses. 10060 * for device with 6 HBMs the scramble is: 10061 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10062 * 10063 * Example: 10064 * ============================================================================= 10065 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10066 * Phys address in MMU last 10067 * HOP 10068 * ============================================================================= 10069 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10070 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10071 * ============================================================================= 10072 */ 10073 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10074 { 10075 struct asic_fixed_properties *prop = &hdev->asic_prop; 10076 u32 divisor, mod_va; 10077 u64 div_va; 10078 10079 /* accept any address in the DRAM address space */ 10080 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10081 VA_HBM_SPACE_END)) { 10082 10083 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10084 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10085 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10086 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10087 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10088 } 10089 10090 return raw_addr; 10091 } 10092 10093 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10094 { 10095 struct asic_fixed_properties *prop = &hdev->asic_prop; 10096 u32 divisor, mod_va; 10097 u64 div_va; 10098 10099 /* accept any address in the DRAM address space */ 10100 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10101 VA_HBM_SPACE_END)) { 10102 10103 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10104 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10105 PAGE_SIZE_64MB, &mod_va); 10106 10107 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10108 (div_va * divisor + mod_va)); 10109 } 10110 10111 return scrambled_addr; 10112 } 10113 10114 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10115 { 10116 u32 base = 0, dcore_id, dec_id; 10117 10118 if (core_id >= NUMBER_OF_DEC) { 10119 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10120 goto out; 10121 } 10122 10123 if (core_id < 8) { 10124 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10125 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10126 10127 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10128 dec_id * DCORE_VDEC_OFFSET; 10129 } else { 10130 /* PCIe Shared Decoder */ 10131 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10132 } 10133 out: 10134 return base; 10135 } 10136 10137 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10138 u32 *block_size, u32 *block_id) 10139 { 10140 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10141 int i; 10142 10143 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10144 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10145 *block_id = i; 10146 if (block_size) 10147 *block_size = gaudi2->mapped_blocks[i].size; 10148 return 0; 10149 } 10150 } 10151 10152 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10153 10154 return -EINVAL; 10155 } 10156 10157 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10158 u32 block_id, u32 block_size) 10159 { 10160 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10161 u64 offset_in_bar; 10162 u64 address; 10163 int rc; 10164 10165 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10166 dev_err(hdev->dev, "Invalid block id %u", block_id); 10167 return -EINVAL; 10168 } 10169 10170 /* we allow mapping only an entire block */ 10171 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10172 dev_err(hdev->dev, "Invalid block size %u", block_size); 10173 return -EINVAL; 10174 } 10175 10176 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10177 10178 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10179 10180 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10181 VM_DONTCOPY | VM_NORESERVE; 10182 10183 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10184 block_size, vma->vm_page_prot); 10185 if (rc) 10186 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10187 10188 return rc; 10189 } 10190 10191 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10192 { 10193 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10194 10195 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10196 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10197 10198 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10199 WREG32(irq_handler_offset, 10200 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10201 } 10202 10203 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10204 { 10205 switch (mmu_id) { 10206 case HW_CAP_DCORE0_DMMU0: 10207 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 10208 break; 10209 case HW_CAP_DCORE0_DMMU1: 10210 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 10211 break; 10212 case HW_CAP_DCORE0_DMMU2: 10213 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 10214 break; 10215 case HW_CAP_DCORE0_DMMU3: 10216 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 10217 break; 10218 case HW_CAP_DCORE1_DMMU0: 10219 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 10220 break; 10221 case HW_CAP_DCORE1_DMMU1: 10222 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 10223 break; 10224 case HW_CAP_DCORE1_DMMU2: 10225 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 10226 break; 10227 case HW_CAP_DCORE1_DMMU3: 10228 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 10229 break; 10230 case HW_CAP_DCORE2_DMMU0: 10231 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 10232 break; 10233 case HW_CAP_DCORE2_DMMU1: 10234 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 10235 break; 10236 case HW_CAP_DCORE2_DMMU2: 10237 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 10238 break; 10239 case HW_CAP_DCORE2_DMMU3: 10240 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 10241 break; 10242 case HW_CAP_DCORE3_DMMU0: 10243 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 10244 break; 10245 case HW_CAP_DCORE3_DMMU1: 10246 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 10247 break; 10248 case HW_CAP_DCORE3_DMMU2: 10249 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 10250 break; 10251 case HW_CAP_DCORE3_DMMU3: 10252 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 10253 break; 10254 case HW_CAP_PMMU: 10255 *mmu_base = mmPMMU_HBW_MMU_BASE; 10256 break; 10257 default: 10258 return -EINVAL; 10259 } 10260 10261 return 0; 10262 } 10263 10264 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 10265 { 10266 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 10267 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10268 u32 mmu_base; 10269 10270 if (!(gaudi2->hw_cap_initialized & mmu_id)) 10271 return; 10272 10273 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 10274 return; 10275 10276 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 10277 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 10278 } 10279 10280 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 10281 { 10282 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 10283 10284 /* check all HMMUs */ 10285 for (i = 0 ; i < num_of_hmmus ; i++) { 10286 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 10287 10288 if (mmu_cap_mask & mmu_id) 10289 gaudi2_ack_mmu_error(hdev, mmu_id); 10290 } 10291 10292 /* check PMMU */ 10293 if (mmu_cap_mask & HW_CAP_PMMU) 10294 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 10295 10296 return 0; 10297 } 10298 10299 static void gaudi2_get_msi_info(__le32 *table) 10300 { 10301 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 10302 } 10303 10304 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 10305 { 10306 switch (pll_idx) { 10307 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 10308 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 10309 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 10310 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 10311 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 10312 case HL_GAUDI2_MME_PLL: return MME_PLL; 10313 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 10314 case HL_GAUDI2_IF_PLL: return IF_PLL; 10315 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 10316 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 10317 case HL_GAUDI2_VID_PLL: return VID_PLL; 10318 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 10319 default: return -EINVAL; 10320 } 10321 } 10322 10323 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 10324 { 10325 /* Not implemented */ 10326 return 0; 10327 } 10328 10329 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 10330 { 10331 /* Not implemented */ 10332 return 0; 10333 } 10334 10335 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 10336 struct hl_device *hdev, struct hl_mon_state_dump *mon) 10337 { 10338 /* Not implemented */ 10339 return 0; 10340 } 10341 10342 10343 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 10344 u64 status_base_offset, enum hl_sync_engine_type engine_type, 10345 u32 engine_id, char **buf, size_t *size, size_t *offset) 10346 { 10347 /* Not implemented */ 10348 return 0; 10349 } 10350 10351 10352 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 10353 .monitor_valid = gaudi2_monitor_valid, 10354 .print_single_monitor = gaudi2_print_single_monitor, 10355 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 10356 .print_fences_single_engine = gaudi2_print_fences_single_engine, 10357 }; 10358 10359 static void gaudi2_state_dump_init(struct hl_device *hdev) 10360 { 10361 /* Not implemented */ 10362 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 10363 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 10364 } 10365 10366 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 10367 { 10368 return 0; 10369 } 10370 10371 static u32 *gaudi2_get_stream_master_qid_arr(void) 10372 { 10373 return NULL; 10374 } 10375 10376 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 10377 struct attribute_group *dev_vrm_attr_grp) 10378 { 10379 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 10380 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 10381 } 10382 10383 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 10384 u32 page_size, u32 *real_page_size, bool is_dram_addr) 10385 { 10386 struct asic_fixed_properties *prop = &hdev->asic_prop; 10387 10388 /* for host pages the page size must be */ 10389 if (!is_dram_addr) { 10390 if (page_size % mmu_prop->page_size) 10391 goto page_size_err; 10392 10393 *real_page_size = mmu_prop->page_size; 10394 return 0; 10395 } 10396 10397 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 10398 goto page_size_err; 10399 10400 /* 10401 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 10402 * than DRAM page size). 10403 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 10404 * this mismatch when calculating the address to place in the MMU page table. 10405 * (in that case also make sure that the dram_page_size is not greater than the 10406 * mmu page size) 10407 */ 10408 *real_page_size = prop->dram_page_size; 10409 10410 return 0; 10411 10412 page_size_err: 10413 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 10414 page_size, mmu_prop->page_size >> 10); 10415 return -EFAULT; 10416 } 10417 10418 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 10419 { 10420 return -EOPNOTSUPP; 10421 } 10422 10423 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 10424 { 10425 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10426 10427 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 10428 return 0; 10429 10430 return hl_fw_send_device_activity(hdev, open); 10431 } 10432 10433 static const struct hl_asic_funcs gaudi2_funcs = { 10434 .early_init = gaudi2_early_init, 10435 .early_fini = gaudi2_early_fini, 10436 .late_init = gaudi2_late_init, 10437 .late_fini = gaudi2_late_fini, 10438 .sw_init = gaudi2_sw_init, 10439 .sw_fini = gaudi2_sw_fini, 10440 .hw_init = gaudi2_hw_init, 10441 .hw_fini = gaudi2_hw_fini, 10442 .halt_engines = gaudi2_halt_engines, 10443 .suspend = gaudi2_suspend, 10444 .resume = gaudi2_resume, 10445 .mmap = gaudi2_mmap, 10446 .ring_doorbell = gaudi2_ring_doorbell, 10447 .pqe_write = gaudi2_pqe_write, 10448 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 10449 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 10450 .scrub_device_mem = gaudi2_scrub_device_mem, 10451 .scrub_device_dram = gaudi2_scrub_device_dram, 10452 .get_int_queue_base = NULL, 10453 .test_queues = gaudi2_test_queues, 10454 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 10455 .asic_dma_pool_free = gaudi2_dma_pool_free, 10456 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 10457 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 10458 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 10459 .asic_dma_map_single = gaudi2_dma_map_single, 10460 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 10461 .cs_parser = gaudi2_cs_parser, 10462 .asic_dma_map_sgtable = hl_dma_map_sgtable, 10463 .add_end_of_cb_packets = NULL, 10464 .update_eq_ci = gaudi2_update_eq_ci, 10465 .context_switch = gaudi2_context_switch, 10466 .restore_phase_topology = gaudi2_restore_phase_topology, 10467 .debugfs_read_dma = gaudi2_debugfs_read_dma, 10468 .add_device_attr = gaudi2_add_device_attr, 10469 .handle_eqe = gaudi2_handle_eqe, 10470 .get_events_stat = gaudi2_get_events_stat, 10471 .read_pte = NULL, 10472 .write_pte = NULL, 10473 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 10474 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 10475 .mmu_prefetch_cache_range = NULL, 10476 .send_heartbeat = gaudi2_send_heartbeat, 10477 .debug_coresight = gaudi2_debug_coresight, 10478 .is_device_idle = gaudi2_is_device_idle, 10479 .compute_reset_late_init = gaudi2_compute_reset_late_init, 10480 .hw_queues_lock = gaudi2_hw_queues_lock, 10481 .hw_queues_unlock = gaudi2_hw_queues_unlock, 10482 .get_pci_id = gaudi2_get_pci_id, 10483 .get_eeprom_data = gaudi2_get_eeprom_data, 10484 .get_monitor_dump = gaudi2_get_monitor_dump, 10485 .send_cpu_message = gaudi2_send_cpu_message, 10486 .pci_bars_map = gaudi2_pci_bars_map, 10487 .init_iatu = gaudi2_init_iatu, 10488 .rreg = hl_rreg, 10489 .wreg = hl_wreg, 10490 .halt_coresight = gaudi2_halt_coresight, 10491 .ctx_init = gaudi2_ctx_init, 10492 .ctx_fini = gaudi2_ctx_fini, 10493 .pre_schedule_cs = gaudi2_pre_schedule_cs, 10494 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 10495 .load_firmware_to_device = NULL, 10496 .load_boot_fit_to_device = NULL, 10497 .get_signal_cb_size = gaudi2_get_signal_cb_size, 10498 .get_wait_cb_size = gaudi2_get_wait_cb_size, 10499 .gen_signal_cb = gaudi2_gen_signal_cb, 10500 .gen_wait_cb = gaudi2_gen_wait_cb, 10501 .reset_sob = gaudi2_reset_sob, 10502 .reset_sob_group = gaudi2_reset_sob_group, 10503 .get_device_time = gaudi2_get_device_time, 10504 .pb_print_security_errors = gaudi2_pb_print_security_errors, 10505 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 10506 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 10507 .get_dec_base_addr = gaudi2_get_dec_base_addr, 10508 .scramble_addr = gaudi2_mmu_scramble_addr, 10509 .descramble_addr = gaudi2_mmu_descramble_addr, 10510 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 10511 .get_hw_block_id = gaudi2_get_hw_block_id, 10512 .hw_block_mmap = gaudi2_block_mmap, 10513 .enable_events_from_fw = gaudi2_enable_events_from_fw, 10514 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 10515 .get_msi_info = gaudi2_get_msi_info, 10516 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 10517 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 10518 .init_firmware_loader = gaudi2_init_firmware_loader, 10519 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 10520 .state_dump_init = gaudi2_state_dump_init, 10521 .get_sob_addr = &gaudi2_get_sob_addr, 10522 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 10523 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 10524 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 10525 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 10526 .access_dev_mem = hl_access_dev_mem, 10527 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 10528 .set_engine_cores = gaudi2_set_engine_cores, 10529 .send_device_activity = gaudi2_send_device_activity, 10530 .set_dram_properties = gaudi2_set_dram_properties, 10531 .set_binning_masks = gaudi2_set_binning_masks, 10532 }; 10533 10534 void gaudi2_set_asic_funcs(struct hl_device *hdev) 10535 { 10536 hdev->asic_funcs = &gaudi2_funcs; 10537 } 10538