xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision b97d6790d03b763eca08847a9a5869a4291b9f9a)
1  // SPDX-License-Identifier: GPL-2.0
2  
3  /*
4   * Copyright 2020-2022 HabanaLabs, Ltd.
5   * All Rights Reserved.
6   */
7  
8  #include "gaudi2P.h"
9  #include "gaudi2_masks.h"
10  #include "../include/gaudi2/gaudi2_special_blocks.h"
11  #include "../include/hw_ip/mmu/mmu_general.h"
12  #include "../include/hw_ip/mmu/mmu_v2_0.h"
13  #include "../include/gaudi2/gaudi2_packets.h"
14  #include "../include/gaudi2/gaudi2_reg_map.h"
15  #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16  #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17  
18  #include <linux/module.h>
19  #include <linux/pci.h>
20  #include <linux/hwmon.h>
21  #include <linux/iommu.h>
22  
23  #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24  
25  #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26  
27  #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28  #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29  #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30  #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31  #define GAUDI2_RESET_POLL_CNT			3
32  #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33  #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34  #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35  #define GAUDI2_CB_POOL_CB_CNT			512
36  #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37  #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38  #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39  #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40  #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41  
42  #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43  
44  /*
45   * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46   * and the code relies on that value (for array size etc..) we define another value
47   * for MAX faulty TPCs which reflects the cluster binning requirements
48   */
49  #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50  #define MAX_FAULTY_XBARS			1
51  #define MAX_FAULTY_EDMAS			1
52  #define MAX_FAULTY_DECODERS			1
53  
54  #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55  #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56  #define GAUDI2_DECODER_FULL_MASK		0x3FF
57  
58  #define GAUDI2_NA_EVENT_CAUSE			0xFF
59  #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60  #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE	25
61  #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62  #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63  #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64  #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65  #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66  #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
67  #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68  #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69  #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
70  #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
71  #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
72  #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
73  #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
74  #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
75  #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
76  #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
77  #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
78  #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
79  #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
80  
81  #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
82  #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
83  #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
84  
85  #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
86  #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
87  
88  #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
89  
90  #define IS_DMA_IDLE(dma_core_sts0)	\
91  	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92  
93  #define IS_DMA_HALTED(dma_core_sts1)	\
94  	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95  
96  #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97  
98  #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99  
100  #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101  	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102  	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103  	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104  
105  #define PCIE_DEC_EN_MASK			0x300
106  #define DEC_WORK_STATE_IDLE			0
107  #define DEC_WORK_STATE_PEND			3
108  #define IS_DEC_IDLE(dec_swreg15) \
109  	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110  	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111  
112  /* HBM MMU address scrambling parameters */
113  #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
114  #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
115  #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
116  #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
117  #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
118  #define MMU_RANGE_INV_VA_LSB_SHIFT		12
119  #define MMU_RANGE_INV_VA_MSB_SHIFT		44
120  #define MMU_RANGE_INV_EN_SHIFT			0
121  #define MMU_RANGE_INV_ASID_EN_SHIFT		1
122  #define MMU_RANGE_INV_ASID_SHIFT		2
123  
124  /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125   * a 2 entries FIFO, and hence it is not enabled for it.
126   */
127  #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128  #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129  
130  #define GAUDI2_MAX_STRING_LEN			64
131  
132  #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133  							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134  
135  #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136  
137  /* RAZWI initiator coordinates */
138  #define RAZWI_GET_AXUSER_XY(x) \
139  	((x & 0xF8001FF0) >> 4)
140  
141  #define RAZWI_GET_AXUSER_LOW_XY(x) \
142  	((x & 0x00001FF0) >> 4)
143  
144  #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
145  #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
146  #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
147  #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
148  
149  #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
150  #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
151  
152  #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153  	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154  		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155  
156  #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157  		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158  
159  #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160  	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161  
162  #define PSOC_RAZWI_ENG_STR_SIZE 128
163  #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164  
165  /* HW scrambles only bits 0-25 */
166  #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
167  
168  struct gaudi2_razwi_info {
169  	u32 axuser_xy;
170  	u32 rtr_ctrl;
171  	u16 eng_id;
172  	char *eng_name;
173  };
174  
175  static struct gaudi2_razwi_info common_razwi_info[] = {
176  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
177  				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
178  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
179  				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
180  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
181  				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
182  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
183  				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
184  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
185  				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
186  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
187  				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
188  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
189  				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
190  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
191  				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
192  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
193  				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
194  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
195  				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
196  		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
197  				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
198  		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
199  				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
200  		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
201  				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
202  		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
203  				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
204  		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
205  				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
206  		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
207  				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
208  		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
209  				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
210  		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
211  				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
212  		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
213  				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
214  		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
215  				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
216  		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
217  				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
218  		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
219  				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
220  		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
221  				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
222  		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
223  				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
224  		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
225  				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
226  		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
227  				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
228  		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
229  				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
230  		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
231  				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
232  		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
233  				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
234  		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
235  				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
236  		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
237  				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
238  		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
239  				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
240  		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
241  				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
242  		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
243  				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
244  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
245  				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
246  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
247  				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
248  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
249  				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
250  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
251  				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
252  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
253  				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
254  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
255  				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
256  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
257  				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
258  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
259  				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
260  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
261  				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
262  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
263  				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
264  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
265  				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
266  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
267  				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
268  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
269  				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
270  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
271  				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
272  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
273  				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
274  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
275  				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
276  		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
277  				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
278  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
279  				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
280  		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
281  				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
282  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
283  				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
284  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
285  				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
286  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
287  				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
288  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
289  				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
290  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
291  				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
292  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
293  				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
294  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
295  				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
296  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
297  				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
298  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
299  				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
300  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
301  				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
302  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
303  				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
304  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
305  				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
306  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
307  				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
308  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
309  				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
310  		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
311  				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
312  		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
313  				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
314  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
315  				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
316  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
317  				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
318  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
319  				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
320  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
321  				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
322  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
323  				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
324  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
325  				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
326  		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
327  				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
328  		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
329  				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
330  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
331  				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
332  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
333  				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
334  		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
335  				GAUDI2_ENGINE_ID_PSOC, "CPU"},
336  		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
337  				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
338  };
339  
340  static struct gaudi2_razwi_info mme_razwi_info[] = {
341  		/* MME X high coordinate is N/A, hence using only low coordinates */
342  		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
343  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
344  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
345  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
346  		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
347  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
348  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
349  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
350  		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
351  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
352  		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
353  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
354  		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
355  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
356  		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
357  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
358  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
359  				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
360  		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
361  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
362  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
363  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
364  		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
365  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
366  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
367  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
368  		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
369  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
370  		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
371  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
372  		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
373  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
374  		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
375  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
376  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
377  				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
378  		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
379  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
380  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
381  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
382  		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
383  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
384  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
385  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
386  		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
387  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
388  		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
389  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
390  		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
391  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
392  		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
393  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
394  		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
395  				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
396  		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
397  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
398  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
399  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
400  		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
401  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
402  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
403  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
404  		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
405  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
406  		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
407  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
408  		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
409  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
410  		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
411  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
412  		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
413  				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
414  };
415  
416  enum hl_pmmu_fatal_cause {
417  	LATENCY_RD_OUT_FIFO_OVERRUN,
418  	LATENCY_WR_OUT_FIFO_OVERRUN,
419  };
420  
421  enum hl_pcie_drain_ind_cause {
422  	LBW_AXI_DRAIN_IND,
423  	HBW_AXI_DRAIN_IND
424  };
425  
426  static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
427  	[HBM_ID0] = 0xFFFC,
428  	[HBM_ID1] = 0xFFCF,
429  	[HBM_ID2] = 0xF7F7,
430  	[HBM_ID3] = 0x7F7F,
431  	[HBM_ID4] = 0xFCFF,
432  	[HBM_ID5] = 0xCFFF,
433  };
434  
435  static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
436  	[0] = HBM_ID0,
437  	[1] = HBM_ID1,
438  	[2] = HBM_ID4,
439  	[3] = HBM_ID5,
440  };
441  
442  static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
443  	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
444  	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
445  	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
446  	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
447  	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
448  	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
449  	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
450  	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
451  };
452  
453  static const int gaudi2_qman_async_event_id[] = {
454  	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
455  	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
456  	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
457  	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
458  	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
459  	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
460  	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
461  	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
462  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
463  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
464  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
465  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
466  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
467  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
468  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
469  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
470  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
471  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
472  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
473  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
474  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
475  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
476  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
477  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
478  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
479  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
480  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
481  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
482  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
483  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
484  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
485  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
486  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
487  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
488  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
489  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
490  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
491  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
492  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
493  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
494  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
495  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
496  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
497  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
498  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
499  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
500  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
501  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
502  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
503  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
504  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
505  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
506  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
507  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
508  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
509  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
510  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
511  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
512  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
513  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
514  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
515  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
516  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
517  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
518  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
519  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
520  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
521  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
522  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
523  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
524  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
525  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
526  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
527  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
528  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
529  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
530  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
531  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
532  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
533  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
534  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
535  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
536  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
537  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
538  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
539  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
540  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
541  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
542  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
543  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
544  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
545  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
546  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
547  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
548  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
549  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
550  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
551  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
552  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
553  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
554  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
555  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
556  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
557  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
558  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
559  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
560  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
561  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
562  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
563  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
564  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
565  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
566  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
567  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
568  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
569  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
570  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
571  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
572  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
573  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
574  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
575  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
576  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
577  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
578  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
579  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
580  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
581  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
582  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
583  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
584  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
585  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
586  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
587  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
588  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
589  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
590  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
591  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
592  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
593  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
594  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
595  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
596  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
597  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
598  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
599  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
600  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
601  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
602  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
603  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
604  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
605  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
606  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
607  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
608  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
609  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
610  	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
611  	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
612  	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
613  	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
614  	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
615  	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
616  	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
617  	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
618  	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
619  	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
620  	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
621  	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
622  	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
623  	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
624  	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
625  	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
626  	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
627  	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
628  	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
629  	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
630  	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
631  	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
632  	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
633  	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
634  	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
635  	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
636  	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
637  	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
638  	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
639  	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
640  	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
641  	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
642  	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
643  	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
644  	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
645  	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
646  	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
647  	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
648  	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
649  	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
650  	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
651  	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
652  	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
653  	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
654  	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
655  	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
656  	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
657  	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
658  	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
659  	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
660  	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
661  	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
662  	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
663  	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
664  	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
665  	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
666  	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
667  	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
668  	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
669  	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
670  	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
671  	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
672  	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
673  	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
674  	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
675  	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
676  	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
677  	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
678  	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
679  	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
680  	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
681  	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
682  	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
683  	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
684  	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
685  	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
686  	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
687  	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
688  	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
689  	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
690  	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
691  	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
692  	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
693  	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
694  	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
695  	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
696  	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
697  	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
698  	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
699  	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
700  	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
701  	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
702  	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
703  	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
704  	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
705  	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
706  	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707  	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708  	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709  	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710  	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
711  	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712  	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713  	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
714  };
715  
716  static const int gaudi2_dma_core_async_event_id[] = {
717  	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
718  	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
719  	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
720  	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
721  	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
722  	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
723  	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
724  	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
725  	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
726  	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
727  	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
728  };
729  
730  static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
731  	"qman sei intr",
732  	"arc sei intr"
733  };
734  
735  static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
736  	"AXI_TERMINATOR WR",
737  	"AXI_TERMINATOR RD",
738  	"AXI SPLIT SEI Status"
739  };
740  
741  static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
742  	"cbu_bresp_sei_intr_cause",
743  	"cbu_rresp_sei_intr_cause",
744  	"lbu_bresp_sei_intr_cause",
745  	"lbu_rresp_sei_intr_cause",
746  	"cbu_axi_split_intr_cause",
747  	"lbu_axi_split_intr_cause",
748  	"arc_ip_excptn_sei_intr_cause",
749  	"dmi_bresp_sei_intr_cause",
750  	"aux2apb_err_sei_intr_cause",
751  	"cfg_lbw_wr_terminated_intr_cause",
752  	"cfg_lbw_rd_terminated_intr_cause",
753  	"cfg_dccm_wr_terminated_intr_cause",
754  	"cfg_dccm_rd_terminated_intr_cause",
755  	"cfg_hbw_rd_terminated_intr_cause"
756  };
757  
758  static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
759  	"msix_vcd_hbw_sei",
760  	"msix_l2c_hbw_sei",
761  	"msix_nrm_hbw_sei",
762  	"msix_abnrm_hbw_sei",
763  	"msix_vcd_lbw_sei",
764  	"msix_l2c_lbw_sei",
765  	"msix_nrm_lbw_sei",
766  	"msix_abnrm_lbw_sei",
767  	"apb_vcd_lbw_sei",
768  	"apb_l2c_lbw_sei",
769  	"apb_nrm_lbw_sei",
770  	"apb_abnrm_lbw_sei",
771  	"dec_sei",
772  	"dec_apb_sei",
773  	"trc_apb_sei",
774  	"lbw_mstr_if_sei",
775  	"axi_split_bresp_err_sei",
776  	"hbw_axi_wr_viol_sei",
777  	"hbw_axi_rd_viol_sei",
778  	"lbw_axi_wr_viol_sei",
779  	"lbw_axi_rd_viol_sei",
780  	"vcd_spi",
781  	"l2c_spi",
782  	"nrm_spi",
783  	"abnrm_spi",
784  };
785  
786  static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
787  	"PQ AXI HBW error",
788  	"CQ AXI HBW error",
789  	"CP AXI HBW error",
790  	"CP error due to undefined OPCODE",
791  	"CP encountered STOP OPCODE",
792  	"CP AXI LBW error",
793  	"CP WRREG32 or WRBULK returned error",
794  	"N/A",
795  	"FENCE 0 inc over max value and clipped",
796  	"FENCE 1 inc over max value and clipped",
797  	"FENCE 2 inc over max value and clipped",
798  	"FENCE 3 inc over max value and clipped",
799  	"FENCE 0 dec under min value and clipped",
800  	"FENCE 1 dec under min value and clipped",
801  	"FENCE 2 dec under min value and clipped",
802  	"FENCE 3 dec under min value and clipped",
803  	"CPDMA Up overflow",
804  	"PQC L2H error"
805  };
806  
807  static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
808  	"RSVD0",
809  	"CQ AXI HBW error",
810  	"CP AXI HBW error",
811  	"CP error due to undefined OPCODE",
812  	"CP encountered STOP OPCODE",
813  	"CP AXI LBW error",
814  	"CP WRREG32 or WRBULK returned error",
815  	"N/A",
816  	"FENCE 0 inc over max value and clipped",
817  	"FENCE 1 inc over max value and clipped",
818  	"FENCE 2 inc over max value and clipped",
819  	"FENCE 3 inc over max value and clipped",
820  	"FENCE 0 dec under min value and clipped",
821  	"FENCE 1 dec under min value and clipped",
822  	"FENCE 2 dec under min value and clipped",
823  	"FENCE 3 dec under min value and clipped",
824  	"CPDMA Up overflow",
825  	"RSVD17",
826  	"CQ_WR_IFIFO_CI_ERR",
827  	"CQ_WR_CTL_CI_ERR",
828  	"ARC_CQF_RD_ERR",
829  	"ARC_CQ_WR_IFIFO_CI_ERR",
830  	"ARC_CQ_WR_CTL_CI_ERR",
831  	"ARC_AXI_ERR",
832  	"CP_SWITCH_WDT_ERR"
833  };
834  
835  static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
836  	"Choice push while full error",
837  	"Choice Q watchdog error",
838  	"MSG AXI LBW returned with error"
839  };
840  
841  static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
842  	"qm_axi_err",
843  	"qm_trace_fence_events",
844  	"qm_sw_err",
845  	"qm_cp_sw_stop",
846  	"lbw_mstr_rresp_err",
847  	"lbw_mstr_bresp_err",
848  	"lbw_msg_slverr",
849  	"hbw_msg_slverr",
850  	"wbc_slverr",
851  	"hbw_mstr_rresp_err",
852  	"hbw_mstr_bresp_err",
853  	"sb_resp_intr",
854  	"mrsb_resp_intr",
855  	"core_dw_status_0",
856  	"core_dw_status_1",
857  	"core_dw_status_2",
858  	"core_dw_status_3",
859  	"core_dw_status_4",
860  	"core_dw_status_5",
861  	"core_dw_status_6",
862  	"core_dw_status_7",
863  	"async_arc2cpu_sei_intr",
864  };
865  
866  static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
867  	"tpc_address_exceed_slm",
868  	"tpc_div_by_0",
869  	"tpc_spu_mac_overflow",
870  	"tpc_spu_addsub_overflow",
871  	"tpc_spu_abs_overflow",
872  	"tpc_spu_fma_fp_dst_nan",
873  	"tpc_spu_fma_fp_dst_inf",
874  	"tpc_spu_convert_fp_dst_nan",
875  	"tpc_spu_convert_fp_dst_inf",
876  	"tpc_spu_fp_dst_denorm",
877  	"tpc_vpu_mac_overflow",
878  	"tpc_vpu_addsub_overflow",
879  	"tpc_vpu_abs_overflow",
880  	"tpc_vpu_convert_fp_dst_nan",
881  	"tpc_vpu_convert_fp_dst_inf",
882  	"tpc_vpu_fma_fp_dst_nan",
883  	"tpc_vpu_fma_fp_dst_inf",
884  	"tpc_vpu_fp_dst_denorm",
885  	"tpc_assertions",
886  	"tpc_illegal_instruction",
887  	"tpc_pc_wrap_around",
888  	"tpc_qm_sw_err",
889  	"tpc_hbw_rresp_err",
890  	"tpc_hbw_bresp_err",
891  	"tpc_lbw_rresp_err",
892  	"tpc_lbw_bresp_err",
893  	"st_unlock_already_locked",
894  	"invalid_lock_access",
895  	"LD_L protection violation",
896  	"ST_L protection violation",
897  	"D$ L0CS mismatch",
898  };
899  
900  static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
901  	"agu_resp_intr",
902  	"qman_axi_err",
903  	"wap sei (wbc axi err)",
904  	"arc sei",
905  	"cfg access error",
906  	"qm_sw_err",
907  	"sbte_dbg_intr_0",
908  	"sbte_dbg_intr_1",
909  	"sbte_dbg_intr_2",
910  	"sbte_dbg_intr_3",
911  	"sbte_dbg_intr_4",
912  	"sbte_prtn_intr_0",
913  	"sbte_prtn_intr_1",
914  	"sbte_prtn_intr_2",
915  	"sbte_prtn_intr_3",
916  	"sbte_prtn_intr_4",
917  };
918  
919  static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
920  	"i0",
921  	"i1",
922  	"i2",
923  	"i3",
924  	"i4",
925  };
926  
927  static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
928  	"WBC ERR RESP_0",
929  	"WBC ERR RESP_1",
930  	"AP SOURCE POS INF",
931  	"AP SOURCE NEG INF",
932  	"AP SOURCE NAN",
933  	"AP RESULT POS INF",
934  	"AP RESULT NEG INF",
935  };
936  
937  static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
938  	"HBW Read returned with error RRESP",
939  	"HBW write returned with error BRESP",
940  	"LBW write returned with error BRESP",
941  	"descriptor_fifo_overflow",
942  	"KDMA SB LBW Read returned with error",
943  	"KDMA WBC LBW Write returned with error",
944  	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
945  	"WRONG CFG FOR COMMIT IN LIN DMA"
946  };
947  
948  static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
949  	"HBW/LBW Read returned with error RRESP",
950  	"HBW/LBW write returned with error BRESP",
951  	"LBW write returned with error BRESP",
952  	"descriptor_fifo_overflow",
953  	"KDMA SB LBW Read returned with error",
954  	"KDMA WBC LBW Write returned with error",
955  	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
956  	"WRONG CFG FOR COMMIT IN LIN DMA"
957  };
958  
959  struct gaudi2_sm_sei_cause_data {
960  	const char *cause_name;
961  	const char *log_name;
962  };
963  
964  static const struct gaudi2_sm_sei_cause_data
965  gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
966  	{"calculated SO value overflow/underflow", "SOB ID"},
967  	{"payload address of monitor is not aligned to 4B", "monitor addr"},
968  	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
969  };
970  
971  static const char * const
972  gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
973  	"LATENCY_RD_OUT_FIFO_OVERRUN",
974  	"LATENCY_WR_OUT_FIFO_OVERRUN",
975  };
976  
977  static const char * const
978  gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
979  	"LATENCY_RD_OUT_FIFO_OVERRUN",
980  	"LATENCY_WR_OUT_FIFO_OVERRUN",
981  };
982  
983  static const char * const
984  gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
985  	"AXI drain HBW",
986  	"AXI drain LBW",
987  };
988  
989  static const char * const
990  gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
991  	"HBW error response",
992  	"LBW error response",
993  	"TLP is blocked by RR"
994  };
995  
996  const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
997  	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
998  	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
999  	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1000  	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1001  	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1002  	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1003  	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1004  	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1005  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1006  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1007  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1008  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1009  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1010  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1011  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1012  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1013  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1014  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1015  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1016  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1017  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1018  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1019  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1020  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1021  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1022  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1023  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1024  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1025  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1026  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1027  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1028  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1029  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1030  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1031  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1032  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1033  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1034  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1035  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1036  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1037  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1038  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1039  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1040  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1041  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1042  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1043  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1044  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1045  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1046  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1047  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1048  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1049  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1050  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1051  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1052  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1053  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1054  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1055  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1056  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1057  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1058  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1059  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1060  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1061  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1062  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1063  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1064  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1065  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1066  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1067  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1068  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1069  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1070  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1071  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1072  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1073  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1074  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1075  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1076  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1077  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1078  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1079  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1080  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1081  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1082  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1083  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1084  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1085  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1086  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1087  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1088  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1089  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1090  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1091  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1092  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1093  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1094  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1095  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1096  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1097  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1098  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1099  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1100  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1101  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1102  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1103  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1104  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1105  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1106  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1107  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1108  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1109  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1110  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1111  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1112  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1113  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1114  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1115  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1116  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1117  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1118  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1119  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1120  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1121  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1122  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1123  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1124  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1125  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1126  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1127  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1128  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1129  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1130  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1131  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1132  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1133  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1134  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1135  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1136  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1137  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1138  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1139  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1140  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1141  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1142  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1143  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1144  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1145  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1146  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1147  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1148  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1149  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1150  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1151  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1152  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1153  	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1154  	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1155  	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1156  	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1157  	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1158  	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1159  	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1160  	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1161  	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1162  	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1163  	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1164  	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1165  	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1166  	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1167  	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1168  	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1169  	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1170  	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1171  	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1172  	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1173  	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1174  	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1175  	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1176  	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1177  	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1178  	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1179  	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1180  	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1181  	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1182  	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1183  	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1184  	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1185  	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1186  	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1187  	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1188  	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1189  	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1190  	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1191  	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1192  	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1193  	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1194  	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1195  	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1196  	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1197  	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1198  	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1199  	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1200  	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1201  	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1202  	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1203  	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1204  	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1205  	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1206  	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1207  	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1208  	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1209  	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1210  	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1211  	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1212  	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1213  	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1214  	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1215  	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1216  	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1217  	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1218  	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1219  	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1220  	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1221  	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1222  	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1223  	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1224  	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1225  	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1226  	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1227  	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1228  	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1229  	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1230  	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1231  	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1232  	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1233  	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1234  	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1235  	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1236  	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1237  	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1238  	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1239  	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1240  	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1241  	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1242  	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1243  	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1244  	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1245  	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1246  	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1247  	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1248  	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1249  	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1250  	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1251  	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1252  	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1253  	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1254  	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1255  	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1256  	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1257  };
1258  
1259  static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1260  	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1261  	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1262  	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1263  	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1264  	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1265  	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1266  	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1267  	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1268  	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1269  	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1270  	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1271  	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1272  	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1273  	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1274  	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1275  	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1276  	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1277  	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1278  	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1279  	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1280  	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1281  	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1282  	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1283  	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1284  	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1285  	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1286  	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1287  	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1288  	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1289  	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1290  	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1291  	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1292  	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1293  	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1294  	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1295  	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1296  	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1297  	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1298  	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1299  	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1300  	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1301  	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1302  	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1303  	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1304  	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1305  	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1306  	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1307  	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1308  	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1309  	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1310  	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1311  	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1312  	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1313  	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1314  	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1315  	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1316  	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1317  	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1318  	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1319  	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1320  	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1321  	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1322  	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1323  	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1324  	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1325  	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1326  	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1327  	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1328  	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1329  };
1330  
1331  static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1332  	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1333  	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1334  	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1335  	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1336  	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1337  	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1338  	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1339  	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1340  	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1341  	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1342  	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1343  	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1344  	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1345  	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1346  	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1347  	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1348  	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1349  	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1350  	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1351  	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1352  	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1353  	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1354  	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1355  	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1356  	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1357  	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1358  	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1359  	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1360  	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1361  	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1362  	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1363  	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1364  	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1365  	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1366  	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1367  	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1368  	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1369  	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1370  	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1371  	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1372  	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1373  	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1374  	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1375  	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1376  	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1377  	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1378  	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1379  	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1380  	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1381  	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1382  	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1383  	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1384  	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1385  	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1386  	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1387  	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1388  	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1389  	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1390  	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1391  	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1392  	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1393  	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1394  	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1395  	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1396  	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1397  	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1398  	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1399  	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1400  	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1401  };
1402  
1403  const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1404  	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1405  	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1406  	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1407  	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1408  };
1409  
1410  static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1411  	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1412  	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1413  	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1414  	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1415  	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1416  	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1417  	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1418  	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1419  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1420  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1421  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1422  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1423  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1424  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1425  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1426  	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1427  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1428  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1429  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1430  	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1431  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1432  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1433  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1434  	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1435  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1436  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1437  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1438  	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1439  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1440  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1441  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1442  	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1443  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1444  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1445  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1446  	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1447  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1448  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1449  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1450  	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1451  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1452  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1453  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1454  	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1455  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1456  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1457  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1458  	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1459  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1460  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1461  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1462  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1463  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1464  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1465  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1466  	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1467  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1468  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1469  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1470  	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1471  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1472  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1473  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1474  	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1475  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1476  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1477  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1478  	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1479  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1480  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1481  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1482  	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1483  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1484  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1485  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1486  	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1487  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1488  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1489  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1490  	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1491  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1492  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1493  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1494  	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1495  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1496  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1497  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1498  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1499  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1500  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1501  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1502  	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1503  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1504  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1505  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1506  	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1507  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1508  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1509  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1510  	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1511  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1512  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1513  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1514  	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1515  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1516  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1517  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1518  	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1519  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1520  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1521  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1522  	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1523  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1524  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1525  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1526  	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1527  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1528  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1529  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1530  	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1531  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1532  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1533  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1534  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1535  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1536  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1537  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1538  	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1539  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1540  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1541  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1542  	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1543  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1544  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1545  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1546  	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1547  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1548  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1549  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1550  	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1551  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1552  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1553  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1554  	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1555  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1556  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1557  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1558  	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1559  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1560  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1561  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1562  	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1563  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1564  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1565  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1566  	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1567  	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1568  	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1569  	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1570  	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1571  	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1572  	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1573  	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1574  	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1575  	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1576  	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1577  	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1578  	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1579  	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1580  	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1581  	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1582  	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1583  	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1584  	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1585  	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1586  	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1587  	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1588  	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1589  	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1590  	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1591  	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1592  	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1593  	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1594  	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1595  	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1596  	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1597  	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1598  	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1599  	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1600  	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1601  	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1602  	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1603  	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1604  	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1605  	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1606  	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1607  	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1608  	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1609  	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1610  	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1611  	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1612  	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1613  	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1614  	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1615  	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1616  	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1617  	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1618  	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1619  	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1620  	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1621  	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1622  	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1623  	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1624  	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1625  	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1626  	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1627  	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1628  	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1629  	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1630  	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1631  	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1632  	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1633  	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1634  	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1635  	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1636  	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1637  	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1638  	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1639  	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1640  	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1641  	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1642  	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1643  	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1644  	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1645  	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1646  	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1647  	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1648  	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1649  	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1650  	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1651  	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1652  	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1653  	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1654  	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1655  	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1656  	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1657  	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1658  	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1659  	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1660  	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1661  	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1662  	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1663  	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1664  	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1665  	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1666  	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1667  	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1668  	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1669  	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1670  	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1671  };
1672  
1673  const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1674  	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1675  	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1676  	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1677  	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1678  	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1679  	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1680  	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1681  	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1682  	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1683  	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1684  	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1685  };
1686  
1687  const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1688  	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1689  	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1690  	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1691  	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1692  };
1693  
1694  static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1695  	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1696  	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1697  	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1698  	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1699  	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1700  	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1701  	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1702  	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1703  	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1704  	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1705  	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1706  	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1707  	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1708  	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1709  	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1710  	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1711  	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1712  	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1713  	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1714  	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1715  	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1716  	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1717  	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1718  	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1719  	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1720  };
1721  
1722  static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1723  	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1724  	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1725  	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1726  	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1727  	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1728  	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1729  	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1730  	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1731  	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1732  	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1733  	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1734  	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1735  	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1736  	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1737  	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1738  	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1739  	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1740  	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1741  	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1742  	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1743  	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1744  	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1745  	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1746  	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1747  	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1748  };
1749  
1750  const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1751  	[ROTATOR_ID_0] = mmROT0_BASE,
1752  	[ROTATOR_ID_1] = mmROT1_BASE
1753  };
1754  
1755  static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1756  	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1757  	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1758  	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1759  	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1760  	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1761  	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1762  	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1763  	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1764  	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1765  	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1766  	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1767  	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1768  	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1769  	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1770  	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1771  	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1772  	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1773  	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1774  	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1775  	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1776  	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1777  	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1778  	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1779  	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1780  	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1781  };
1782  
1783  static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1784  	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1785  	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1786  };
1787  
1788  static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1789  	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1790  	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1791  	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1792  	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1793  	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1794  	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1795  	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1796  	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1797  	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1798  	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1799  	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1800  	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1801  	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1802  	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1803  	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1804  	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1805  	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1806  	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1807  	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1808  	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1809  	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1810  	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1811  	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1812  	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1813  	/* the PCI TPC is placed last (mapped liked HW) */
1814  	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1815  };
1816  
1817  static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1818  	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1819  	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1820  	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1821  	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1822  };
1823  
1824  static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1825  	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1826  	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1827  	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1828  	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1829  	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1830  	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1831  	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1832  	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1833  	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1834  	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1835  	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1836  };
1837  
1838  const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1839  	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1840  	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1841  	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1842  	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1843  	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1844  	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1845  	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1846  	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1847  };
1848  
1849  static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1850  	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1851  	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1852  	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1853  	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1854  	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1855  	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1856  	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1857  	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1858  	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1859  	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1860  };
1861  
1862  enum rtr_id {
1863  	DCORE0_RTR0,
1864  	DCORE0_RTR1,
1865  	DCORE0_RTR2,
1866  	DCORE0_RTR3,
1867  	DCORE0_RTR4,
1868  	DCORE0_RTR5,
1869  	DCORE0_RTR6,
1870  	DCORE0_RTR7,
1871  	DCORE1_RTR0,
1872  	DCORE1_RTR1,
1873  	DCORE1_RTR2,
1874  	DCORE1_RTR3,
1875  	DCORE1_RTR4,
1876  	DCORE1_RTR5,
1877  	DCORE1_RTR6,
1878  	DCORE1_RTR7,
1879  	DCORE2_RTR0,
1880  	DCORE2_RTR1,
1881  	DCORE2_RTR2,
1882  	DCORE2_RTR3,
1883  	DCORE2_RTR4,
1884  	DCORE2_RTR5,
1885  	DCORE2_RTR6,
1886  	DCORE2_RTR7,
1887  	DCORE3_RTR0,
1888  	DCORE3_RTR1,
1889  	DCORE3_RTR2,
1890  	DCORE3_RTR3,
1891  	DCORE3_RTR4,
1892  	DCORE3_RTR5,
1893  	DCORE3_RTR6,
1894  	DCORE3_RTR7,
1895  };
1896  
1897  static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1898  	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1899  	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1900  	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1901  	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1902  	DCORE0_RTR0
1903  };
1904  
1905  static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1906  	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1907  	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1908  	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1909  	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1910  	DCORE0_RTR0
1911  };
1912  
1913  static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1914  	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1915  	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1916  };
1917  
1918  static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1919  	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1920  	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1921  };
1922  
1923  static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1924  	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1925  	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1926  };
1927  
1928  static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1929  	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1930  	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1931  };
1932  
1933  static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1934  	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1935  	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1936  	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1937  	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1938  	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1939  	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1940  	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1941  	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1942  };
1943  
1944  static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1945  	DCORE0_RTR0, DCORE0_RTR0
1946  };
1947  
1948  static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1949  	DCORE0_RTR2, DCORE0_RTR2
1950  };
1951  
1952  static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1953  	DCORE2_RTR0, DCORE3_RTR7
1954  };
1955  
1956  static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1957  	DCORE2_RTR2, DCORE3_RTR5
1958  };
1959  
1960  struct mme_initiators_rtr_id {
1961  	u32 wap0;
1962  	u32 wap1;
1963  	u32 write;
1964  	u32 read;
1965  	u32 sbte0;
1966  	u32 sbte1;
1967  	u32 sbte2;
1968  	u32 sbte3;
1969  	u32 sbte4;
1970  };
1971  
1972  enum mme_initiators {
1973  	MME_WAP0 = 0,
1974  	MME_WAP1,
1975  	MME_WRITE,
1976  	MME_READ,
1977  	MME_SBTE0,
1978  	MME_SBTE1,
1979  	MME_SBTE2,
1980  	MME_SBTE3,
1981  	MME_SBTE4,
1982  	MME_INITIATORS_MAX
1983  };
1984  
1985  static const struct mme_initiators_rtr_id
1986  gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1987  	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1988  	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1989  	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1990  	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1991  	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1992  	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1993  	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1994  	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1995  };
1996  
1997  enum razwi_event_sources {
1998  	RAZWI_TPC,
1999  	RAZWI_MME,
2000  	RAZWI_EDMA,
2001  	RAZWI_PDMA,
2002  	RAZWI_NIC,
2003  	RAZWI_DEC,
2004  	RAZWI_ROT
2005  };
2006  
2007  struct hbm_mc_error_causes {
2008  	u32 mask;
2009  	char cause[50];
2010  };
2011  
2012  static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2013  
2014  /* Special blocks iterator is currently used to configure security protection bits,
2015   * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2016   * must be skipped. Following configurations are commonly used for both PB config
2017   * and global error reading, since currently they both share the same settings.
2018   * Once it changes, we must remember to use separate configurations for either one.
2019   */
2020  static int gaudi2_iterator_skip_block_types[] = {
2021  		GAUDI2_BLOCK_TYPE_PLL,
2022  		GAUDI2_BLOCK_TYPE_EU_BIST,
2023  		GAUDI2_BLOCK_TYPE_HBM,
2024  		GAUDI2_BLOCK_TYPE_XFT
2025  };
2026  
2027  static struct range gaudi2_iterator_skip_block_ranges[] = {
2028  		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2029  		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2030  		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2031  		/* Skip all CPU blocks except for CPU_IF */
2032  		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2033  		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2034  };
2035  
2036  static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2037  	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2038  	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2039  	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2040  	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2041  	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2042  };
2043  
2044  static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2045  	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2046  	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2047  	[HBM_SEI_READ_ERR] = "SEI read data error",
2048  	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2049  	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2050  	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2051  	[HBM_SEI_DFI] = "SEI DFI error",
2052  	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2053  	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2054  };
2055  
2056  struct mmu_spi_sei_cause {
2057  	char cause[50];
2058  	int clear_bit;
2059  };
2060  
2061  static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2062  	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2063  	{"page access", 1},		/* INTERRUPT_CLR[1] */
2064  	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2065  	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2066  	{"mmu rei0", -1},		/* no clear register bit */
2067  	{"mmu rei1", -1},		/* no clear register bit */
2068  	{"stlb rei0", -1},		/* no clear register bit */
2069  	{"stlb rei1", -1},		/* no clear register bit */
2070  	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2071  	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2072  	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2073  	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2074  	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2075  	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2076  	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2077  	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2078  	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2079  	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2080  	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2081  };
2082  
2083  struct gaudi2_cache_invld_params {
2084  	u64 start_va;
2085  	u64 end_va;
2086  	u32 inv_start_val;
2087  	u32 flags;
2088  	bool range_invalidation;
2089  };
2090  
2091  struct gaudi2_tpc_idle_data {
2092  	struct engines_data *e;
2093  	unsigned long *mask;
2094  	bool *is_idle;
2095  	const char *tpc_fmt;
2096  };
2097  
2098  struct gaudi2_tpc_mmu_data {
2099  	u32 rw_asid;
2100  };
2101  
2102  static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2103  
2104  static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2105  static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2106  static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2107  static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2108  static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2109  static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2110  static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2111  										bool is_memset);
2112  static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113  		struct engines_data *e);
2114  static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2115  		struct engines_data *e);
2116  static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2117  		struct engines_data *e);
2118  static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2119  static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2120  
gaudi2_init_scrambler_hbm(struct hl_device * hdev)2121  static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2122  {
2123  
2124  }
2125  
gaudi2_get_signal_cb_size(struct hl_device * hdev)2126  static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2127  {
2128  	return sizeof(struct packet_msg_short);
2129  }
2130  
gaudi2_get_wait_cb_size(struct hl_device * hdev)2131  static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2132  {
2133  	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2134  }
2135  
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)2136  void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2137  {
2138  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2139  	int dcore, inst, tpc_seq;
2140  	u32 offset;
2141  
2142  	/* init the return code */
2143  	ctx->rc = 0;
2144  
2145  	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2146  		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2147  			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2148  
2149  			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2150  				continue;
2151  
2152  			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2153  
2154  			ctx->fn(hdev, dcore, inst, offset, ctx);
2155  			if (ctx->rc) {
2156  				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2157  							dcore, inst);
2158  				return;
2159  			}
2160  		}
2161  	}
2162  
2163  	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2164  		return;
2165  
2166  	/* special check for PCI TPC (DCORE0_TPC6) */
2167  	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2168  	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2169  	if (ctx->rc)
2170  		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2171  }
2172  
gaudi2_host_phys_addr_valid(u64 addr)2173  static bool gaudi2_host_phys_addr_valid(u64 addr)
2174  {
2175  	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2176  		return true;
2177  
2178  	return false;
2179  }
2180  
set_number_of_functional_hbms(struct hl_device * hdev)2181  static int set_number_of_functional_hbms(struct hl_device *hdev)
2182  {
2183  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2184  	u8 faulty_hbms = hweight64(hdev->dram_binning);
2185  
2186  	/* check if all HBMs should be used */
2187  	if (!faulty_hbms) {
2188  		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2189  		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2190  		return 0;
2191  	}
2192  
2193  	/*
2194  	 * check for error condition in which number of binning
2195  	 * candidates is higher than the maximum supported by the
2196  	 * driver (in which case binning mask shall be ignored and driver will
2197  	 * set the default)
2198  	 */
2199  	if (faulty_hbms > MAX_FAULTY_HBMS) {
2200  		dev_err(hdev->dev,
2201  			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2202  			MAX_FAULTY_HBMS, hdev->dram_binning);
2203  		return -EINVAL;
2204  	}
2205  
2206  	/*
2207  	 * by default, number of functional HBMs in Gaudi2 is always
2208  	 * GAUDI2_HBM_NUM - 1.
2209  	 */
2210  	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2211  	return 0;
2212  }
2213  
gaudi2_set_dram_properties(struct hl_device * hdev)2214  static int gaudi2_set_dram_properties(struct hl_device *hdev)
2215  {
2216  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2217  	u32 basic_hbm_page_size;
2218  	int rc;
2219  
2220  	rc = set_number_of_functional_hbms(hdev);
2221  	if (rc)
2222  		return -EINVAL;
2223  
2224  	/*
2225  	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2226  	 * in which we are using x16 bigger page size to be able to populate the entire
2227  	 * HBM mappings in the TLB
2228  	 */
2229  	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2230  	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2231  	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2232  	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2233  	prop->dram_base_address = DRAM_PHYS_BASE;
2234  	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2235  	prop->dram_supports_virtual_memory = true;
2236  
2237  	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2238  	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2239  	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2240  	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2241  
2242  	/* since DRAM page size differs from DMMU page size we need to allocate
2243  	 * DRAM memory in units of dram_page size and mapping this memory in
2244  	 * units of DMMU page size. we overcome this size mismatch using a
2245  	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2246  	 * page.
2247  	 * We therefore:
2248  	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2249  	 *    (suppose we get n such pages)
2250  	 * 2. limit the amount of virtual address space we got from 1 above to
2251  	 *    a multiple of 64M as we don't want the scrambled address to cross
2252  	 *    the DRAM virtual address space.
2253  	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2254  	 * 3. determine the and address accordingly
2255  	 *    end_addr = start_addr + m * 48M
2256  	 *
2257  	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2258  	 */
2259  	prop->dmmu.start_addr = prop->dram_base_address +
2260  			(prop->dram_page_size *
2261  				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2262  
2263  	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2264  			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2265  
2266  	return 0;
2267  }
2268  
gaudi2_set_fixed_properties(struct hl_device * hdev)2269  static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2270  {
2271  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2272  	struct hw_queue_properties *q_props;
2273  	u32 num_sync_stream_queues = 0;
2274  	int i;
2275  
2276  	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2277  	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2278  					GFP_KERNEL);
2279  
2280  	if (!prop->hw_queues_props)
2281  		return -ENOMEM;
2282  
2283  	q_props = prop->hw_queues_props;
2284  
2285  	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2286  		q_props[i].type = QUEUE_TYPE_HW;
2287  		q_props[i].driver_only = 0;
2288  
2289  		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2290  			q_props[i].supports_sync_stream = 0;
2291  		} else {
2292  			q_props[i].supports_sync_stream = 1;
2293  			num_sync_stream_queues++;
2294  		}
2295  
2296  		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2297  	}
2298  
2299  	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2300  	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2301  	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2302  
2303  	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2304  	prop->cfg_base_address = CFG_BASE;
2305  	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2306  	prop->host_base_address = HOST_PHYS_BASE_0;
2307  	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2308  	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2309  	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2310  	prop->user_dec_intr_count = NUMBER_OF_DEC;
2311  	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2312  	prop->completion_mode = HL_COMPLETION_MODE_CS;
2313  	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2314  	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2315  
2316  	prop->sram_base_address = SRAM_BASE_ADDR;
2317  	prop->sram_size = SRAM_SIZE;
2318  	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2319  	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2320  
2321  	prop->hints_range_reservation = true;
2322  
2323  	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2324  
2325  	if (hdev->pldm)
2326  		prop->mmu_pgt_size = 0x800000; /* 8MB */
2327  	else
2328  		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2329  
2330  	prop->mmu_pte_size = HL_PTE_SIZE;
2331  	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2332  	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2333  
2334  	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2335  	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2336  	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2337  	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2338  	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2339  	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2340  	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2341  	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2342  	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2343  	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2344  	prop->dmmu.page_size = PAGE_SIZE_1GB;
2345  	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2346  	prop->dmmu.last_mask = LAST_MASK;
2347  	prop->dmmu.host_resident = 1;
2348  	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2349  	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2350  
2351  	/*
2352  	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2353  	 * the addresses and allocated space for FW image does not cross memory bounds).
2354  	 * for this reason we set the DRAM size to the minimum possible and later it will
2355  	 * be modified according to what reported in the cpucp info packet
2356  	 */
2357  	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2358  
2359  	hdev->pmmu_huge_range = true;
2360  	prop->pmmu.host_resident = 1;
2361  	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2362  	prop->pmmu.last_mask = LAST_MASK;
2363  	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2364  	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2365  
2366  	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2367  	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2368  	prop->hints_host_hpage_reserved_va_range.start_addr =
2369  			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2370  	prop->hints_host_hpage_reserved_va_range.end_addr =
2371  			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2372  
2373  	if (PAGE_SIZE == SZ_64K) {
2374  		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2375  		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2376  		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2377  		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2378  		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2379  		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2380  		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2381  		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2382  		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2383  		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2384  		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2385  		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2386  		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2387  		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2388  		prop->pmmu.page_size = PAGE_SIZE_64KB;
2389  
2390  		/* shifts and masks are the same in PMMU and HPMMU */
2391  		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2392  		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2393  		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2394  		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2395  	} else {
2396  		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2397  		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2398  		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2399  		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2400  		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2401  		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2402  		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2403  		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2404  		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2405  		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2406  		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2407  		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2408  		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2409  		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2410  		prop->pmmu.page_size = PAGE_SIZE_4KB;
2411  
2412  		/* shifts and masks are the same in PMMU and HPMMU */
2413  		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2414  		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2415  		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2416  		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2417  	}
2418  
2419  	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2420  	prop->num_engine_cores = CPU_ID_MAX;
2421  	prop->cfg_size = CFG_SIZE;
2422  	prop->max_asid = MAX_ASID;
2423  	prop->num_of_events = GAUDI2_EVENT_SIZE;
2424  
2425  	prop->supports_engine_modes = true;
2426  
2427  	prop->dc_power_default = DC_POWER_DEFAULT;
2428  
2429  	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2430  	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2431  	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2432  	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2433  
2434  	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2435  
2436  	prop->mme_master_slave_mode = 1;
2437  
2438  	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2439  					(num_sync_stream_queues * HL_RSVD_SOBS);
2440  
2441  	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2442  					(num_sync_stream_queues * HL_RSVD_MONS);
2443  
2444  	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2445  	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2446  	prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2447  
2448  	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2449  
2450  	prop->fw_cpu_boot_dev_sts0_valid = false;
2451  	prop->fw_cpu_boot_dev_sts1_valid = false;
2452  	prop->hard_reset_done_by_fw = false;
2453  	prop->gic_interrupts_enable = true;
2454  
2455  	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2456  
2457  	prop->max_dec = NUMBER_OF_DEC;
2458  
2459  	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2460  
2461  	prop->dma_mask = 64;
2462  
2463  	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2464  
2465  	return 0;
2466  }
2467  
gaudi2_pci_bars_map(struct hl_device * hdev)2468  static int gaudi2_pci_bars_map(struct hl_device *hdev)
2469  {
2470  	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2471  	bool is_wc[3] = {false, false, true};
2472  	int rc;
2473  
2474  	rc = hl_pci_bars_map(hdev, name, is_wc);
2475  	if (rc)
2476  		return rc;
2477  
2478  	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2479  
2480  	return 0;
2481  }
2482  
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2483  static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2484  {
2485  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2486  	struct hl_inbound_pci_region pci_region;
2487  	u64 old_addr = addr;
2488  	int rc;
2489  
2490  	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2491  		return old_addr;
2492  
2493  	if (hdev->asic_prop.iatu_done_by_fw)
2494  		return U64_MAX;
2495  
2496  	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2497  	pci_region.mode = PCI_BAR_MATCH_MODE;
2498  	pci_region.bar = DRAM_BAR_ID;
2499  	pci_region.addr = addr;
2500  	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2501  	if (rc)
2502  		return U64_MAX;
2503  
2504  	if (gaudi2) {
2505  		old_addr = gaudi2->dram_bar_cur_addr;
2506  		gaudi2->dram_bar_cur_addr = addr;
2507  	}
2508  
2509  	return old_addr;
2510  }
2511  
gaudi2_init_iatu(struct hl_device * hdev)2512  static int gaudi2_init_iatu(struct hl_device *hdev)
2513  {
2514  	struct hl_inbound_pci_region inbound_region;
2515  	struct hl_outbound_pci_region outbound_region;
2516  	u32 bar_addr_low, bar_addr_high;
2517  	int rc;
2518  
2519  	if (hdev->asic_prop.iatu_done_by_fw)
2520  		return 0;
2521  
2522  	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2523  	 * We must map this region in BAR match mode in order to
2524  	 * fetch BAR physical base address
2525  	 */
2526  	inbound_region.mode = PCI_BAR_MATCH_MODE;
2527  	inbound_region.bar = SRAM_CFG_BAR_ID;
2528  	/* Base address must be aligned to Bar size which is 256 MB */
2529  	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2530  	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2531  	if (rc)
2532  		return rc;
2533  
2534  	/* Fetch physical BAR address */
2535  	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2536  	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2537  
2538  	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2539  
2540  	/* Inbound Region 0 - Bar 0 - Point to CFG */
2541  	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2542  	inbound_region.bar = SRAM_CFG_BAR_ID;
2543  	inbound_region.offset_in_bar = 0;
2544  	inbound_region.addr = STM_FLASH_BASE_ADDR;
2545  	inbound_region.size = CFG_REGION_SIZE;
2546  	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2547  	if (rc)
2548  		return rc;
2549  
2550  	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2551  	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2552  	inbound_region.bar = SRAM_CFG_BAR_ID;
2553  	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2554  	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2555  	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2556  	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2557  	if (rc)
2558  		return rc;
2559  
2560  	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2561  	inbound_region.mode = PCI_BAR_MATCH_MODE;
2562  	inbound_region.bar = DRAM_BAR_ID;
2563  	inbound_region.addr = DRAM_PHYS_BASE;
2564  	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2565  	if (rc)
2566  		return rc;
2567  
2568  	/* Outbound Region 0 - Point to Host */
2569  	outbound_region.addr = HOST_PHYS_BASE_0;
2570  	outbound_region.size = HOST_PHYS_SIZE_0;
2571  	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2572  
2573  	return rc;
2574  }
2575  
gaudi2_get_hw_state(struct hl_device * hdev)2576  static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2577  {
2578  	return RREG32(mmHW_STATE);
2579  }
2580  
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2581  static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2582  {
2583  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2584  
2585  	/*
2586  	 * check for error condition in which number of binning candidates
2587  	 * is higher than the maximum supported by the driver
2588  	 */
2589  	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2590  		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2591  					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2592  					hdev->tpc_binning);
2593  		return -EINVAL;
2594  	}
2595  
2596  	prop->tpc_binning_mask = hdev->tpc_binning;
2597  	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2598  
2599  	return 0;
2600  }
2601  
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2602  static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2603  {
2604  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2605  	struct hw_queue_properties *q_props = prop->hw_queues_props;
2606  	u64 tpc_binning_mask;
2607  	u8 subst_idx = 0;
2608  	int i, rc;
2609  
2610  	rc = gaudi2_tpc_binning_init_prop(hdev);
2611  	if (rc)
2612  		return rc;
2613  
2614  	tpc_binning_mask = prop->tpc_binning_mask;
2615  
2616  	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2617  		u8 subst_seq, binned, qid_base;
2618  
2619  		if (tpc_binning_mask == 0)
2620  			break;
2621  
2622  		if (subst_idx == 0) {
2623  			subst_seq = TPC_ID_DCORE0_TPC6;
2624  			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2625  		} else {
2626  			subst_seq = TPC_ID_DCORE3_TPC5;
2627  			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2628  		}
2629  
2630  
2631  		/* clear bit from mask */
2632  		binned = __ffs(tpc_binning_mask);
2633  		/*
2634  		 * Coverity complains about possible out-of-bound access in
2635  		 * clear_bit
2636  		 */
2637  		if (binned >= TPC_ID_SIZE) {
2638  			dev_err(hdev->dev,
2639  				"Invalid binned TPC (binning mask: %llx)\n",
2640  				tpc_binning_mask);
2641  			return -EINVAL;
2642  		}
2643  		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2644  
2645  		/* also clear replacing TPC bit from enabled mask */
2646  		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2647  
2648  		/* bin substite TPC's Qs */
2649  		q_props[qid_base].binned = 1;
2650  		q_props[qid_base + 1].binned = 1;
2651  		q_props[qid_base + 2].binned = 1;
2652  		q_props[qid_base + 3].binned = 1;
2653  
2654  		subst_idx++;
2655  	}
2656  
2657  	return 0;
2658  }
2659  
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2660  static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2661  {
2662  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2663  	u8 num_faulty;
2664  
2665  	num_faulty = hweight32(hdev->decoder_binning);
2666  
2667  	/*
2668  	 * check for error condition in which number of binning candidates
2669  	 * is higher than the maximum supported by the driver
2670  	 */
2671  	if (num_faulty > MAX_FAULTY_DECODERS) {
2672  		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2673  						hdev->decoder_binning);
2674  		return -EINVAL;
2675  	}
2676  
2677  	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2678  
2679  	if (prop->decoder_binning_mask)
2680  		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2681  	else
2682  		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2683  
2684  	return 0;
2685  }
2686  
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2687  static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2688  {
2689  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2690  
2691  	/* check if we should override default binning */
2692  	if (!hdev->dram_binning) {
2693  		prop->dram_binning_mask = 0;
2694  		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2695  		return;
2696  	}
2697  
2698  	/* set DRAM binning constraints */
2699  	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2700  	prop->dram_binning_mask = hdev->dram_binning;
2701  	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2702  }
2703  
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2704  static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2705  {
2706  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2707  	struct hw_queue_properties *q_props;
2708  	u8 seq, num_faulty;
2709  
2710  	num_faulty = hweight32(hdev->edma_binning);
2711  
2712  	/*
2713  	 * check for error condition in which number of binning candidates
2714  	 * is higher than the maximum supported by the driver
2715  	 */
2716  	if (num_faulty > MAX_FAULTY_EDMAS) {
2717  		dev_err(hdev->dev,
2718  			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2719  			hdev->edma_binning);
2720  		return -EINVAL;
2721  	}
2722  
2723  	if (!hdev->edma_binning) {
2724  		prop->edma_binning_mask = 0;
2725  		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2726  		return 0;
2727  	}
2728  
2729  	seq = __ffs((unsigned long)hdev->edma_binning);
2730  
2731  	/* set binning constraints */
2732  	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2733  	prop->edma_binning_mask = hdev->edma_binning;
2734  	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2735  
2736  	/* bin substitute EDMA's queue */
2737  	q_props = prop->hw_queues_props;
2738  	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2739  	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2740  	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2741  	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2742  
2743  	return 0;
2744  }
2745  
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2746  static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2747  {
2748  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2749  	u8 num_faulty, seq;
2750  
2751  	/* check if we should override default binning */
2752  	if (!xbar_edge_iso_mask) {
2753  		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2754  		return 0;
2755  	}
2756  
2757  	/*
2758  	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2759  	 * only the FW can set a redundancy value). for user it'll always be 0.
2760  	 */
2761  	num_faulty = hweight32(xbar_edge_iso_mask);
2762  
2763  	/*
2764  	 * check for error condition in which number of binning candidates
2765  	 * is higher than the maximum supported by the driver
2766  	 */
2767  	if (num_faulty > MAX_FAULTY_XBARS) {
2768  		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2769  									MAX_FAULTY_XBARS);
2770  		return -EINVAL;
2771  	}
2772  
2773  	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2774  
2775  	/* set binning constraints */
2776  	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2777  	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2778  
2779  	return 0;
2780  }
2781  
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2782  static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2783  {
2784  	int rc;
2785  
2786  	/*
2787  	 * mark all clusters as good, each component will "fail" cluster
2788  	 * based on eFuse/user values.
2789  	 * If more than single cluster is faulty- the chip is unusable
2790  	 */
2791  	hdev->asic_prop.faulty_dram_cluster_map = 0;
2792  
2793  	gaudi2_set_dram_binning_masks(hdev);
2794  
2795  	rc = gaudi2_set_edma_binning_masks(hdev);
2796  	if (rc)
2797  		return rc;
2798  
2799  	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2800  	if (rc)
2801  		return rc;
2802  
2803  
2804  	/* always initially set to full mask */
2805  	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2806  
2807  	return 0;
2808  }
2809  
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2810  static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2811  {
2812  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2813  	int rc;
2814  
2815  	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2816  	if (rc)
2817  		return rc;
2818  
2819  	/* if we have DRAM binning reported by FW we should perform cluster config  */
2820  	if (prop->faulty_dram_cluster_map) {
2821  		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2822  
2823  		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2824  	}
2825  
2826  	return 0;
2827  }
2828  
gaudi2_set_binning_masks(struct hl_device * hdev)2829  static int gaudi2_set_binning_masks(struct hl_device *hdev)
2830  {
2831  	int rc;
2832  
2833  	rc = gaudi2_set_cluster_binning_masks(hdev);
2834  	if (rc)
2835  		return rc;
2836  
2837  	rc = gaudi2_set_tpc_binning_masks(hdev);
2838  	if (rc)
2839  		return rc;
2840  
2841  	rc = gaudi2_set_dec_binning_masks(hdev);
2842  	if (rc)
2843  		return rc;
2844  
2845  	return 0;
2846  }
2847  
gaudi2_cpucp_info_get(struct hl_device * hdev)2848  static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2849  {
2850  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2851  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2852  	long max_power;
2853  	u64 dram_size;
2854  	int rc;
2855  
2856  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2857  		return 0;
2858  
2859  	/* No point of asking this information again when not doing hard reset, as the device
2860  	 * CPU hasn't been reset
2861  	 */
2862  	if (hdev->reset_info.in_compute_reset)
2863  		return 0;
2864  
2865  	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2866  										mmCPU_BOOT_ERR1);
2867  	if (rc)
2868  		return rc;
2869  
2870  	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2871  	if (dram_size) {
2872  		/* we can have wither 5 or 6 HBMs. other values are invalid */
2873  
2874  		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2875  					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2876  			dev_err(hdev->dev,
2877  				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2878  				dram_size, prop->dram_size);
2879  			dram_size = prop->dram_size;
2880  		}
2881  
2882  		prop->dram_size = dram_size;
2883  		prop->dram_end_address = prop->dram_base_address + dram_size;
2884  	}
2885  
2886  	if (!strlen(prop->cpucp_info.card_name))
2887  		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2888  
2889  	/* Overwrite binning masks with the actual binning values from F/W */
2890  	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2891  	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2892  	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2893  	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2894  
2895  	dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2896  			hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2897  			hdev->decoder_binning);
2898  
2899  	/*
2900  	 * at this point the DRAM parameters need to be updated according to data obtained
2901  	 * from the FW
2902  	 */
2903  	rc = hdev->asic_funcs->set_dram_properties(hdev);
2904  	if (rc)
2905  		return rc;
2906  
2907  	rc = hdev->asic_funcs->set_binning_masks(hdev);
2908  	if (rc)
2909  		return rc;
2910  
2911  	max_power = hl_fw_get_max_power(hdev);
2912  	if (max_power < 0)
2913  		return max_power;
2914  
2915  	prop->max_power_default = (u64) max_power;
2916  
2917  	return 0;
2918  }
2919  
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)2920  static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2921  {
2922  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2923  	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2924  	int rc;
2925  
2926  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2927  		return 0;
2928  
2929  	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2930  	if (rc)
2931  		return rc;
2932  
2933  	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2934  
2935  	return 0;
2936  }
2937  
gaudi2_early_init(struct hl_device * hdev)2938  static int gaudi2_early_init(struct hl_device *hdev)
2939  {
2940  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2941  	struct pci_dev *pdev = hdev->pdev;
2942  	resource_size_t pci_bar_size;
2943  	int rc;
2944  
2945  	rc = gaudi2_set_fixed_properties(hdev);
2946  	if (rc)
2947  		return rc;
2948  
2949  	/* Check BAR sizes */
2950  	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2951  
2952  	if (pci_bar_size != CFG_BAR_SIZE) {
2953  		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2954  			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2955  		rc = -ENODEV;
2956  		goto free_queue_props;
2957  	}
2958  
2959  	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2960  	if (pci_bar_size != MSIX_BAR_SIZE) {
2961  		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2962  			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2963  		rc = -ENODEV;
2964  		goto free_queue_props;
2965  	}
2966  
2967  	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2968  	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2969  
2970  	/*
2971  	 * Only in pldm driver config iATU
2972  	 */
2973  	if (hdev->pldm)
2974  		hdev->asic_prop.iatu_done_by_fw = false;
2975  	else
2976  		hdev->asic_prop.iatu_done_by_fw = true;
2977  
2978  	rc = hl_pci_init(hdev);
2979  	if (rc)
2980  		goto free_queue_props;
2981  
2982  	/* Before continuing in the initialization, we need to read the preboot
2983  	 * version to determine whether we run with a security-enabled firmware
2984  	 */
2985  	rc = hl_fw_read_preboot_status(hdev);
2986  	if (rc) {
2987  		if (hdev->reset_on_preboot_fail)
2988  			/* we are already on failure flow, so don't check if hw_fini fails. */
2989  			hdev->asic_funcs->hw_fini(hdev, true, false);
2990  		goto pci_fini;
2991  	}
2992  
2993  	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2994  		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2995  		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2996  		if (rc) {
2997  			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2998  			goto pci_fini;
2999  		}
3000  	}
3001  
3002  	return 0;
3003  
3004  pci_fini:
3005  	hl_pci_fini(hdev);
3006  free_queue_props:
3007  	kfree(hdev->asic_prop.hw_queues_props);
3008  	return rc;
3009  }
3010  
gaudi2_early_fini(struct hl_device * hdev)3011  static int gaudi2_early_fini(struct hl_device *hdev)
3012  {
3013  	kfree(hdev->asic_prop.hw_queues_props);
3014  	hl_pci_fini(hdev);
3015  
3016  	return 0;
3017  }
3018  
gaudi2_is_arc_nic_owned(u64 arc_id)3019  static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3020  {
3021  	switch (arc_id) {
3022  	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3023  		return true;
3024  	default:
3025  		return false;
3026  	}
3027  }
3028  
gaudi2_is_arc_tpc_owned(u64 arc_id)3029  static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3030  {
3031  	switch (arc_id) {
3032  	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3033  		return true;
3034  	default:
3035  		return false;
3036  	}
3037  }
3038  
gaudi2_init_arcs(struct hl_device * hdev)3039  static void gaudi2_init_arcs(struct hl_device *hdev)
3040  {
3041  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3042  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3043  	u64 arc_id;
3044  	u32 i;
3045  
3046  	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3047  		if (gaudi2_is_arc_enabled(hdev, i))
3048  			continue;
3049  
3050  		gaudi2_set_arc_id_cap(hdev, i);
3051  	}
3052  
3053  	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3054  		if (!gaudi2_is_queue_enabled(hdev, i))
3055  			continue;
3056  
3057  		arc_id = gaudi2_queue_id_to_arc_id[i];
3058  		if (gaudi2_is_arc_enabled(hdev, arc_id))
3059  			continue;
3060  
3061  		if (gaudi2_is_arc_nic_owned(arc_id) &&
3062  				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3063  			continue;
3064  
3065  		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3066  							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3067  			continue;
3068  
3069  		gaudi2_set_arc_id_cap(hdev, arc_id);
3070  	}
3071  
3072  	/* Fetch ARC scratchpad address */
3073  	hdev->asic_prop.engine_core_interrupt_reg_addr =
3074  		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3075  }
3076  
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)3077  static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3078  {
3079  	u32 reg_base, reg_val;
3080  	int rc;
3081  
3082  	switch (cpu_id) {
3083  	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3084  		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3085  		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3086  						ARC_DCCM_BLOCK_SIZE * 2, true);
3087  		if (rc)
3088  			return rc;
3089  		break;
3090  	case CPU_ID_SCHED_ARC4:
3091  	case CPU_ID_SCHED_ARC5:
3092  	case CPU_ID_MME_QMAN_ARC0:
3093  	case CPU_ID_MME_QMAN_ARC1:
3094  		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3095  
3096  		/* Scrub lower DCCM block */
3097  		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3098  						ARC_DCCM_BLOCK_SIZE, true);
3099  		if (rc)
3100  			return rc;
3101  
3102  		/* Switch to upper DCCM block */
3103  		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3104  		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3105  
3106  		/* Scrub upper DCCM block */
3107  		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3108  						ARC_DCCM_BLOCK_SIZE, true);
3109  		if (rc)
3110  			return rc;
3111  
3112  		/* Switch to lower DCCM block */
3113  		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3114  		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3115  		break;
3116  	default:
3117  		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3118  						ARC_DCCM_BLOCK_SIZE, true);
3119  		if (rc)
3120  			return rc;
3121  	}
3122  
3123  	return 0;
3124  }
3125  
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)3126  static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3127  {
3128  	u16 arc_id;
3129  	int rc;
3130  
3131  	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3132  		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3133  			continue;
3134  
3135  		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3136  		if (rc)
3137  			return rc;
3138  	}
3139  
3140  	return 0;
3141  }
3142  
gaudi2_late_init(struct hl_device * hdev)3143  static int gaudi2_late_init(struct hl_device *hdev)
3144  {
3145  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3146  	int rc;
3147  
3148  	hdev->asic_prop.supports_advanced_cpucp_rc = true;
3149  
3150  	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3151  					gaudi2->virt_msix_db_dma_addr);
3152  	if (rc) {
3153  		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3154  		return rc;
3155  	}
3156  
3157  	rc = gaudi2_fetch_psoc_frequency(hdev);
3158  	if (rc) {
3159  		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3160  		goto disable_pci_access;
3161  	}
3162  
3163  	gaudi2_init_arcs(hdev);
3164  
3165  	rc = gaudi2_scrub_arcs_dccm(hdev);
3166  	if (rc) {
3167  		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3168  		goto disable_pci_access;
3169  	}
3170  
3171  	gaudi2_init_security(hdev);
3172  
3173  	return 0;
3174  
3175  disable_pci_access:
3176  	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3177  
3178  	return rc;
3179  }
3180  
gaudi2_late_fini(struct hl_device * hdev)3181  static void gaudi2_late_fini(struct hl_device *hdev)
3182  {
3183  	hl_hwmon_release_resources(hdev);
3184  }
3185  
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)3186  static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3187  {
3188  	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3189  
3190  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3191  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3192  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3193  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3194  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3195  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3196  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3197  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3198  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3199  	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3200  }
3201  
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)3202  static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3203  {
3204  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3205  	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3206  	u32 block_size, umr_start_idx, num_umr_blocks;
3207  	int i;
3208  
3209  	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3210  		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3211  			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3212  		else
3213  			block_size = ARC_DCCM_BLOCK_SIZE;
3214  
3215  		blocks[i].address = gaudi2_arc_dccm_bases[i];
3216  		blocks[i].size = block_size;
3217  	}
3218  
3219  	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3220  	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3221  
3222  	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3223  	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3224  
3225  	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3226  	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3227  
3228  	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3229  	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3230  
3231  	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3232  	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3233  
3234  	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3235  	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3236  
3237  	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3238  	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3239  
3240  	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3241  	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3242  
3243  	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3244  	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3245  	for (i = 0 ; i < num_umr_blocks ; i++) {
3246  		u8 nic_id, umr_block_id;
3247  
3248  		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3249  		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3250  
3251  		blocks[umr_start_idx + i].address =
3252  			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3253  			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3254  			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3255  			umr_block_id * NIC_UMR_OFFSET;
3256  		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3257  	}
3258  
3259  	/* Expose decoder HW configuration block to user */
3260  	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3261  
3262  	for (i = 1; i < NUM_OF_DCORES; ++i) {
3263  		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3264  		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3265  
3266  		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3267  						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3268  
3269  		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3270  						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3271  	}
3272  }
3273  
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)3274  static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3275  {
3276  	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3277  	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3278  	int i, j, rc = 0;
3279  
3280  	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3281  	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3282  	 * range.
3283  	 */
3284  
3285  	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3286  		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3287  							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3288  		if (!virt_addr_arr[i]) {
3289  			rc = -ENOMEM;
3290  			goto free_dma_mem_arr;
3291  		}
3292  
3293  		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3294  		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3295  			break;
3296  	}
3297  
3298  	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3299  		dev_err(hdev->dev,
3300  			"MSB of ARC accessible DMA memory are not identical in all range\n");
3301  		rc = -EFAULT;
3302  		goto free_dma_mem_arr;
3303  	}
3304  
3305  	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3306  	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3307  
3308  free_dma_mem_arr:
3309  	for (j = 0 ; j < i ; j++)
3310  		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3311  						dma_addr_arr[j]);
3312  
3313  	return rc;
3314  }
3315  
gaudi2_set_pci_memory_regions(struct hl_device * hdev)3316  static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3317  {
3318  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3319  	struct pci_mem_region *region;
3320  
3321  	/* CFG */
3322  	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3323  	region->region_base = CFG_BASE;
3324  	region->region_size = CFG_SIZE;
3325  	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3326  	region->bar_size = CFG_BAR_SIZE;
3327  	region->bar_id = SRAM_CFG_BAR_ID;
3328  	region->used = 1;
3329  
3330  	/* SRAM */
3331  	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3332  	region->region_base = SRAM_BASE_ADDR;
3333  	region->region_size = SRAM_SIZE;
3334  	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3335  	region->bar_size = CFG_BAR_SIZE;
3336  	region->bar_id = SRAM_CFG_BAR_ID;
3337  	region->used = 1;
3338  
3339  	/* DRAM */
3340  	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3341  	region->region_base = DRAM_PHYS_BASE;
3342  	region->region_size = hdev->asic_prop.dram_size;
3343  	region->offset_in_bar = 0;
3344  	region->bar_size = prop->dram_pci_bar_size;
3345  	region->bar_id = DRAM_BAR_ID;
3346  	region->used = 1;
3347  }
3348  
gaudi2_user_interrupt_setup(struct hl_device * hdev)3349  static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3350  {
3351  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3352  	int i, j, k;
3353  
3354  	/* Initialize TPC interrupt */
3355  	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3356  
3357  	/* Initialize unexpected error interrupt */
3358  	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3359  						HL_USR_INTERRUPT_UNEXPECTED);
3360  
3361  	/* Initialize common user CQ interrupt */
3362  	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3363  				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3364  
3365  	/* Initialize common decoder interrupt */
3366  	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3367  				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3368  
3369  	/* User interrupts structure holds both decoder and user interrupts from various engines.
3370  	 * We first initialize the decoder interrupts and then we add the user interrupts.
3371  	 * The only limitation is that the last decoder interrupt id must be smaller
3372  	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3373  	 */
3374  
3375  	/* Initialize decoder interrupts, expose only normal interrupts,
3376  	 * error interrupts to be handled by driver
3377  	 */
3378  	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3379  										i += 2, j++)
3380  		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3381  						HL_USR_INTERRUPT_DECODER);
3382  
3383  	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3384  		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3385  }
3386  
gaudi2_get_non_zero_random_int(void)3387  static inline int gaudi2_get_non_zero_random_int(void)
3388  {
3389  	int rand = get_random_u32();
3390  
3391  	return rand ? rand : 1;
3392  }
3393  
gaudi2_special_blocks_free(struct hl_device * hdev)3394  static void gaudi2_special_blocks_free(struct hl_device *hdev)
3395  {
3396  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3397  	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3398  			&prop->skip_special_blocks_cfg;
3399  
3400  	kfree(prop->special_blocks);
3401  	kfree(skip_special_blocks_cfg->block_types);
3402  	kfree(skip_special_blocks_cfg->block_ranges);
3403  }
3404  
gaudi2_special_blocks_iterator_free(struct hl_device * hdev)3405  static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3406  {
3407  	gaudi2_special_blocks_free(hdev);
3408  }
3409  
gaudi2_special_block_skip(struct hl_device * hdev,struct hl_special_blocks_cfg * special_blocks_cfg,u32 blk_idx,u32 major,u32 minor,u32 sub_minor)3410  static bool gaudi2_special_block_skip(struct hl_device *hdev,
3411  		struct hl_special_blocks_cfg *special_blocks_cfg,
3412  		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3413  {
3414  	return false;
3415  }
3416  
gaudi2_special_blocks_config(struct hl_device * hdev)3417  static int gaudi2_special_blocks_config(struct hl_device *hdev)
3418  {
3419  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3420  	int i, rc;
3421  
3422  	/* Configure Special blocks */
3423  	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3424  	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3425  	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3426  			sizeof(*prop->special_blocks), GFP_KERNEL);
3427  	if (!prop->special_blocks)
3428  		return -ENOMEM;
3429  
3430  	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3431  		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3432  				sizeof(*prop->special_blocks));
3433  
3434  	/* Configure when to skip Special blocks */
3435  	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3436  	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3437  
3438  	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3439  		prop->skip_special_blocks_cfg.block_types =
3440  				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3441  					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3442  		if (!prop->skip_special_blocks_cfg.block_types) {
3443  			rc = -ENOMEM;
3444  			goto free_special_blocks;
3445  		}
3446  
3447  		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3448  				sizeof(gaudi2_iterator_skip_block_types));
3449  
3450  		prop->skip_special_blocks_cfg.block_types_len =
3451  					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3452  	}
3453  
3454  	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3455  		prop->skip_special_blocks_cfg.block_ranges =
3456  				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3457  					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3458  		if (!prop->skip_special_blocks_cfg.block_ranges) {
3459  			rc = -ENOMEM;
3460  			goto free_skip_special_blocks_types;
3461  		}
3462  
3463  		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3464  			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3465  					&gaudi2_iterator_skip_block_ranges[i],
3466  					sizeof(struct range));
3467  
3468  		prop->skip_special_blocks_cfg.block_ranges_len =
3469  					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3470  	}
3471  
3472  	return 0;
3473  
3474  free_skip_special_blocks_types:
3475  	kfree(prop->skip_special_blocks_cfg.block_types);
3476  free_special_blocks:
3477  	kfree(prop->special_blocks);
3478  
3479  	return rc;
3480  }
3481  
gaudi2_special_blocks_iterator_config(struct hl_device * hdev)3482  static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3483  {
3484  	return gaudi2_special_blocks_config(hdev);
3485  }
3486  
gaudi2_test_queues_msgs_free(struct hl_device * hdev)3487  static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3488  {
3489  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3490  	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3491  	int i;
3492  
3493  	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3494  		/* bail-out if this is an allocation failure point */
3495  		if (!msg_info[i].kern_addr)
3496  			break;
3497  
3498  		hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3499  		msg_info[i].kern_addr = NULL;
3500  	}
3501  }
3502  
gaudi2_test_queues_msgs_alloc(struct hl_device * hdev)3503  static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3504  {
3505  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3506  	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3507  	int i, rc;
3508  
3509  	/* allocate a message-short buf for each Q we intend to test */
3510  	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3511  		msg_info[i].kern_addr =
3512  			(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3513  							GFP_KERNEL, &msg_info[i].dma_addr);
3514  		if (!msg_info[i].kern_addr) {
3515  			dev_err(hdev->dev,
3516  				"Failed to allocate dma memory for H/W queue %d testing\n", i);
3517  			rc = -ENOMEM;
3518  			goto err_exit;
3519  		}
3520  	}
3521  
3522  	return 0;
3523  
3524  err_exit:
3525  	gaudi2_test_queues_msgs_free(hdev);
3526  	return rc;
3527  }
3528  
gaudi2_sw_init(struct hl_device * hdev)3529  static int gaudi2_sw_init(struct hl_device *hdev)
3530  {
3531  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3532  	struct gaudi2_device *gaudi2;
3533  	int i, rc;
3534  
3535  	/* Allocate device structure */
3536  	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3537  	if (!gaudi2)
3538  		return -ENOMEM;
3539  
3540  	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3541  		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3542  			continue;
3543  
3544  		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3545  			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3546  				GAUDI2_EVENT_SIZE);
3547  			rc = -EINVAL;
3548  			goto free_gaudi2_device;
3549  		}
3550  
3551  		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3552  	}
3553  
3554  	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3555  		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3556  
3557  	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3558  
3559  	hdev->asic_specific = gaudi2;
3560  
3561  	/* Create DMA pool for small allocations.
3562  	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3563  	 * PI/CI registers allocated from this pool have this restriction
3564  	 */
3565  	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3566  					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3567  	if (!hdev->dma_pool) {
3568  		dev_err(hdev->dev, "failed to create DMA pool\n");
3569  		rc = -ENOMEM;
3570  		goto free_gaudi2_device;
3571  	}
3572  
3573  	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3574  	if (rc)
3575  		goto free_dma_pool;
3576  
3577  	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3578  	if (!hdev->cpu_accessible_dma_pool) {
3579  		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3580  		rc = -ENOMEM;
3581  		goto free_cpu_dma_mem;
3582  	}
3583  
3584  	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3585  				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3586  	if (rc) {
3587  		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3588  		rc = -EFAULT;
3589  		goto free_cpu_accessible_dma_pool;
3590  	}
3591  
3592  	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3593  								&gaudi2->virt_msix_db_dma_addr);
3594  	if (!gaudi2->virt_msix_db_cpu_addr) {
3595  		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3596  		rc = -ENOMEM;
3597  		goto free_cpu_accessible_dma_pool;
3598  	}
3599  
3600  	spin_lock_init(&gaudi2->hw_queues_lock);
3601  
3602  	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3603  							&gaudi2->scratchpad_bus_address,
3604  							GFP_KERNEL | __GFP_ZERO);
3605  	if (!gaudi2->scratchpad_kernel_address) {
3606  		rc = -ENOMEM;
3607  		goto free_virt_msix_db_mem;
3608  	}
3609  
3610  	gaudi2_user_mapped_blocks_init(hdev);
3611  
3612  	/* Initialize user interrupts */
3613  	gaudi2_user_interrupt_setup(hdev);
3614  
3615  	hdev->supports_coresight = true;
3616  	hdev->supports_sync_stream = true;
3617  	hdev->supports_cb_mapping = true;
3618  	hdev->supports_wait_for_multi_cs = false;
3619  
3620  	prop->supports_compute_reset = true;
3621  
3622  	/* Event queue sanity check added in FW version 1.11 */
3623  	if (hl_is_fw_sw_ver_below(hdev, 1, 11))
3624  		hdev->event_queue.check_eqe_index = false;
3625  	else
3626  		hdev->event_queue.check_eqe_index = true;
3627  
3628  	hdev->asic_funcs->set_pci_memory_regions(hdev);
3629  
3630  	rc = gaudi2_special_blocks_iterator_config(hdev);
3631  	if (rc)
3632  		goto free_scratchpad_mem;
3633  
3634  	rc = gaudi2_test_queues_msgs_alloc(hdev);
3635  	if (rc)
3636  		goto special_blocks_free;
3637  
3638  	return 0;
3639  
3640  special_blocks_free:
3641  	gaudi2_special_blocks_iterator_free(hdev);
3642  free_scratchpad_mem:
3643  	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3644  				  gaudi2->scratchpad_bus_address);
3645  free_virt_msix_db_mem:
3646  	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3647  free_cpu_accessible_dma_pool:
3648  	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3649  free_cpu_dma_mem:
3650  	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3651  					hdev->cpu_accessible_dma_address);
3652  free_dma_pool:
3653  	dma_pool_destroy(hdev->dma_pool);
3654  free_gaudi2_device:
3655  	kfree(gaudi2);
3656  	return rc;
3657  }
3658  
gaudi2_sw_fini(struct hl_device * hdev)3659  static int gaudi2_sw_fini(struct hl_device *hdev)
3660  {
3661  	struct asic_fixed_properties *prop = &hdev->asic_prop;
3662  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3663  
3664  	gaudi2_test_queues_msgs_free(hdev);
3665  
3666  	gaudi2_special_blocks_iterator_free(hdev);
3667  
3668  	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3669  
3670  	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3671  
3672  	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3673  						hdev->cpu_accessible_dma_address);
3674  
3675  	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3676  					gaudi2->scratchpad_bus_address);
3677  
3678  	dma_pool_destroy(hdev->dma_pool);
3679  
3680  	kfree(gaudi2);
3681  
3682  	return 0;
3683  }
3684  
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3685  static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3686  {
3687  	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3688  						QM_GLBL_CFG1_CQF_STOP |
3689  						QM_GLBL_CFG1_CP_STOP);
3690  
3691  	/* stop also the ARC */
3692  	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3693  }
3694  
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3695  static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3696  {
3697  	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3698  						QM_GLBL_CFG1_CQF_FLUSH |
3699  						QM_GLBL_CFG1_CP_FLUSH);
3700  }
3701  
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3702  static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3703  {
3704  	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3705  }
3706  
3707  /**
3708   * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3709   *
3710   * @hdev: pointer to the habanalabs device structure
3711   * @queue_id: queue to clear fence counters to
3712   * @skip_fence: if true set maximum fence value to all fence counters to avoid
3713   *              getting stuck on any fence value. otherwise set all fence
3714   *              counters to 0 (standard clear of fence counters)
3715   */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3716  static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3717  						bool skip_fence)
3718  {
3719  	u32 size, reg_base;
3720  	u32 addr, val;
3721  
3722  	reg_base = gaudi2_qm_blocks_bases[queue_id];
3723  
3724  	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3725  	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3726  
3727  	/*
3728  	 * in case we want to make sure that QM that is stuck on a fence will
3729  	 * be released we should set the fence counter to a higher value that
3730  	 * the value the QM waiting for. to comply with any fence counter of
3731  	 * any value we set maximum fence value to all counters
3732  	 */
3733  	val = skip_fence ? U32_MAX : 0;
3734  	gaudi2_memset_device_lbw(hdev, addr, size, val);
3735  }
3736  
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3737  static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3738  {
3739  	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3740  
3741  	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3742  	gaudi2_flush_qman_common(hdev, reg_base);
3743  	gaudi2_flush_qman_arc_common(hdev, reg_base);
3744  }
3745  
gaudi2_stop_dma_qmans(struct hl_device * hdev)3746  static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3747  {
3748  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3749  	int dcore, inst;
3750  
3751  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3752  		goto stop_edma_qmans;
3753  
3754  	/* Stop CPs of PDMA QMANs */
3755  	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3756  	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3757  
3758  stop_edma_qmans:
3759  	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3760  		return;
3761  
3762  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3763  		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3764  			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3765  			u32 qm_base;
3766  
3767  			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3768  				continue;
3769  
3770  			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3771  					inst * DCORE_EDMA_OFFSET;
3772  
3773  			/* Stop CPs of EDMA QMANs */
3774  			gaudi2_stop_qman_common(hdev, qm_base);
3775  		}
3776  	}
3777  }
3778  
gaudi2_stop_mme_qmans(struct hl_device * hdev)3779  static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3780  {
3781  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3782  	u32 offset, i;
3783  
3784  	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3785  
3786  	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3787  		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3788  			continue;
3789  
3790  		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3791  	}
3792  }
3793  
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3794  static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3795  {
3796  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3797  	u32 reg_base;
3798  	int i;
3799  
3800  	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3801  		return;
3802  
3803  	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3804  		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3805  			continue;
3806  
3807  		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3808  		gaudi2_stop_qman_common(hdev, reg_base);
3809  	}
3810  }
3811  
gaudi2_stop_rot_qmans(struct hl_device * hdev)3812  static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3813  {
3814  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3815  	u32 reg_base;
3816  	int i;
3817  
3818  	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3819  		return;
3820  
3821  	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3822  		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3823  			continue;
3824  
3825  		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3826  		gaudi2_stop_qman_common(hdev, reg_base);
3827  	}
3828  }
3829  
gaudi2_stop_nic_qmans(struct hl_device * hdev)3830  static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3831  {
3832  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3833  	u32 reg_base, queue_id;
3834  	int i;
3835  
3836  	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3837  		return;
3838  
3839  	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3840  
3841  	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3842  		if (!(hdev->nic_ports_mask & BIT(i)))
3843  			continue;
3844  
3845  		reg_base = gaudi2_qm_blocks_bases[queue_id];
3846  		gaudi2_stop_qman_common(hdev, reg_base);
3847  	}
3848  }
3849  
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)3850  static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3851  {
3852  	u32 reg_val;
3853  
3854  	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3855  	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3856  }
3857  
gaudi2_dma_stall(struct hl_device * hdev)3858  static void gaudi2_dma_stall(struct hl_device *hdev)
3859  {
3860  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3861  	int dcore, inst;
3862  
3863  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3864  		goto stall_edma;
3865  
3866  	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3867  	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3868  
3869  stall_edma:
3870  	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3871  		return;
3872  
3873  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3874  		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3875  			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3876  			u32 core_base;
3877  
3878  			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3879  				continue;
3880  
3881  			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3882  					inst * DCORE_EDMA_OFFSET;
3883  
3884  			/* Stall CPs of EDMA QMANs */
3885  			gaudi2_stall_dma_common(hdev, core_base);
3886  		}
3887  	}
3888  }
3889  
gaudi2_mme_stall(struct hl_device * hdev)3890  static void gaudi2_mme_stall(struct hl_device *hdev)
3891  {
3892  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3893  	u32 offset, i;
3894  
3895  	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3896  
3897  	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3898  		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3899  			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3900  }
3901  
gaudi2_tpc_stall(struct hl_device * hdev)3902  static void gaudi2_tpc_stall(struct hl_device *hdev)
3903  {
3904  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3905  	u32 reg_base;
3906  	int i;
3907  
3908  	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3909  		return;
3910  
3911  	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3912  		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3913  			continue;
3914  
3915  		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3916  		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3917  	}
3918  }
3919  
gaudi2_rotator_stall(struct hl_device * hdev)3920  static void gaudi2_rotator_stall(struct hl_device *hdev)
3921  {
3922  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3923  	u32 reg_val;
3924  	int i;
3925  
3926  	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3927  		return;
3928  
3929  	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3930  			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3931  			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3932  
3933  	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3934  		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3935  			continue;
3936  
3937  		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3938  	}
3939  }
3940  
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)3941  static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3942  {
3943  	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3944  }
3945  
gaudi2_disable_dma_qmans(struct hl_device * hdev)3946  static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3947  {
3948  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3949  	int dcore, inst;
3950  
3951  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3952  		goto stop_edma_qmans;
3953  
3954  	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3955  	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3956  
3957  stop_edma_qmans:
3958  	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3959  		return;
3960  
3961  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3962  		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3963  			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3964  			u32 qm_base;
3965  
3966  			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3967  				continue;
3968  
3969  			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3970  					inst * DCORE_EDMA_OFFSET;
3971  
3972  			/* Disable CPs of EDMA QMANs */
3973  			gaudi2_disable_qman_common(hdev, qm_base);
3974  		}
3975  	}
3976  }
3977  
gaudi2_disable_mme_qmans(struct hl_device * hdev)3978  static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3979  {
3980  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3981  	u32 offset, i;
3982  
3983  	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3984  
3985  	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3986  		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3987  			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3988  }
3989  
gaudi2_disable_tpc_qmans(struct hl_device * hdev)3990  static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3991  {
3992  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3993  	u32 reg_base;
3994  	int i;
3995  
3996  	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3997  		return;
3998  
3999  	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4000  		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4001  			continue;
4002  
4003  		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4004  		gaudi2_disable_qman_common(hdev, reg_base);
4005  	}
4006  }
4007  
gaudi2_disable_rot_qmans(struct hl_device * hdev)4008  static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4009  {
4010  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4011  	u32 reg_base;
4012  	int i;
4013  
4014  	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4015  		return;
4016  
4017  	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4018  		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4019  			continue;
4020  
4021  		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4022  		gaudi2_disable_qman_common(hdev, reg_base);
4023  	}
4024  }
4025  
gaudi2_disable_nic_qmans(struct hl_device * hdev)4026  static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4027  {
4028  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4029  	u32 reg_base, queue_id;
4030  	int i;
4031  
4032  	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4033  		return;
4034  
4035  	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4036  
4037  	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4038  		if (!(hdev->nic_ports_mask & BIT(i)))
4039  			continue;
4040  
4041  		reg_base = gaudi2_qm_blocks_bases[queue_id];
4042  		gaudi2_disable_qman_common(hdev, reg_base);
4043  	}
4044  }
4045  
gaudi2_enable_timestamp(struct hl_device * hdev)4046  static void gaudi2_enable_timestamp(struct hl_device *hdev)
4047  {
4048  	/* Disable the timestamp counter */
4049  	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4050  
4051  	/* Zero the lower/upper parts of the 64-bit counter */
4052  	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4053  	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4054  
4055  	/* Enable the counter */
4056  	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4057  }
4058  
gaudi2_disable_timestamp(struct hl_device * hdev)4059  static void gaudi2_disable_timestamp(struct hl_device *hdev)
4060  {
4061  	/* Disable the timestamp counter */
4062  	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4063  }
4064  
gaudi2_irq_name(u16 irq_number)4065  static const char *gaudi2_irq_name(u16 irq_number)
4066  {
4067  	switch (irq_number) {
4068  	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4069  		return "gaudi2 cpu eq";
4070  	case GAUDI2_IRQ_NUM_COMPLETION:
4071  		return "gaudi2 completion";
4072  	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4073  		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4074  	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4075  		return "gaudi2 tpc assert";
4076  	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4077  		return "gaudi2 unexpected error";
4078  	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4079  		return "gaudi2 user completion";
4080  	default:
4081  		return "invalid";
4082  	}
4083  }
4084  
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)4085  static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4086  {
4087  	int i, irq, relative_idx;
4088  	struct hl_dec *dec;
4089  
4090  	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4091  		irq = pci_irq_vector(hdev->pdev, i);
4092  		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4093  
4094  		dec = hdev->dec + relative_idx / 2;
4095  
4096  		/* We pass different structures depending on the irq handler. For the abnormal
4097  		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4098  		 * user_interrupt entry
4099  		 */
4100  		free_irq(irq, ((relative_idx % 2) ?
4101  				(void *) dec :
4102  				(void *) &hdev->user_interrupt[dec->core_id]));
4103  	}
4104  }
4105  
gaudi2_dec_enable_msix(struct hl_device * hdev)4106  static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4107  {
4108  	int rc, i, irq_init_cnt, irq, relative_idx;
4109  	struct hl_dec *dec;
4110  
4111  	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4112  			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4113  			i++, irq_init_cnt++) {
4114  
4115  		irq = pci_irq_vector(hdev->pdev, i);
4116  		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4117  
4118  		/* We pass different structures depending on the irq handler. For the abnormal
4119  		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4120  		 * user_interrupt entry
4121  		 *
4122  		 * TODO: change the dec abnrm to threaded irq
4123  		 */
4124  
4125  		dec = hdev->dec + relative_idx / 2;
4126  		if (relative_idx % 2) {
4127  			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4128  						gaudi2_irq_name(i), (void *) dec);
4129  		} else {
4130  			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4131  					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4132  					gaudi2_irq_name(i),
4133  					(void *) &hdev->user_interrupt[dec->core_id]);
4134  		}
4135  
4136  		if (rc) {
4137  			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4138  			goto free_dec_irqs;
4139  		}
4140  	}
4141  
4142  	return 0;
4143  
4144  free_dec_irqs:
4145  	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4146  	return rc;
4147  }
4148  
gaudi2_enable_msix(struct hl_device * hdev)4149  static int gaudi2_enable_msix(struct hl_device *hdev)
4150  {
4151  	struct asic_fixed_properties *prop = &hdev->asic_prop;
4152  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4153  	int rc, irq, i, j, user_irq_init_cnt;
4154  	struct hl_cq *cq;
4155  
4156  	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4157  		return 0;
4158  
4159  	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4160  					PCI_IRQ_MSIX);
4161  	if (rc < 0) {
4162  		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4163  			GAUDI2_MSIX_ENTRIES, rc);
4164  		return rc;
4165  	}
4166  
4167  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4168  	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4169  	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4170  	if (rc) {
4171  		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4172  		goto free_irq_vectors;
4173  	}
4174  
4175  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4176  	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4177  			&hdev->event_queue);
4178  	if (rc) {
4179  		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4180  		goto free_completion_irq;
4181  	}
4182  
4183  	rc = gaudi2_dec_enable_msix(hdev);
4184  	if (rc) {
4185  		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4186  		goto free_event_irq;
4187  	}
4188  
4189  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4190  	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4191  			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4192  			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4193  	if (rc) {
4194  		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4195  		goto free_dec_irq;
4196  	}
4197  
4198  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4199  	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4200  			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4201  					&hdev->unexpected_error_interrupt);
4202  	if (rc) {
4203  		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4204  		goto free_tpc_irq;
4205  	}
4206  
4207  	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4208  			user_irq_init_cnt < prop->user_interrupt_count;
4209  			i++, j++, user_irq_init_cnt++) {
4210  
4211  		irq = pci_irq_vector(hdev->pdev, i);
4212  		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4213  						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4214  						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4215  
4216  		if (rc) {
4217  			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4218  			goto free_user_irq;
4219  		}
4220  	}
4221  
4222  	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4223  
4224  	return 0;
4225  
4226  free_user_irq:
4227  	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4228  			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4229  
4230  		irq = pci_irq_vector(hdev->pdev, i);
4231  		free_irq(irq, &hdev->user_interrupt[j]);
4232  	}
4233  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4234  	free_irq(irq, &hdev->unexpected_error_interrupt);
4235  free_tpc_irq:
4236  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4237  	free_irq(irq, &hdev->tpc_interrupt);
4238  free_dec_irq:
4239  	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4240  free_event_irq:
4241  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4242  	free_irq(irq, cq);
4243  
4244  free_completion_irq:
4245  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4246  	free_irq(irq, cq);
4247  
4248  free_irq_vectors:
4249  	pci_free_irq_vectors(hdev->pdev);
4250  
4251  	return rc;
4252  }
4253  
gaudi2_sync_irqs(struct hl_device * hdev)4254  static void gaudi2_sync_irqs(struct hl_device *hdev)
4255  {
4256  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4257  	int i, j;
4258  	int irq;
4259  
4260  	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4261  		return;
4262  
4263  	/* Wait for all pending IRQs to be finished */
4264  	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4265  
4266  	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4267  		irq = pci_irq_vector(hdev->pdev, i);
4268  		synchronize_irq(irq);
4269  	}
4270  
4271  	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4272  	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4273  
4274  	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4275  										i++, j++) {
4276  		irq = pci_irq_vector(hdev->pdev, i);
4277  		synchronize_irq(irq);
4278  	}
4279  
4280  	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4281  }
4282  
gaudi2_disable_msix(struct hl_device * hdev)4283  static void gaudi2_disable_msix(struct hl_device *hdev)
4284  {
4285  	struct asic_fixed_properties *prop = &hdev->asic_prop;
4286  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4287  	struct hl_cq *cq;
4288  	int irq, i, j, k;
4289  
4290  	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4291  		return;
4292  
4293  	gaudi2_sync_irqs(hdev);
4294  
4295  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4296  	free_irq(irq, &hdev->event_queue);
4297  
4298  	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4299  
4300  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4301  	free_irq(irq, &hdev->tpc_interrupt);
4302  
4303  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4304  	free_irq(irq, &hdev->unexpected_error_interrupt);
4305  
4306  	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4307  			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4308  
4309  		irq = pci_irq_vector(hdev->pdev, i);
4310  		free_irq(irq, &hdev->user_interrupt[j]);
4311  	}
4312  
4313  	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4314  	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4315  	free_irq(irq, cq);
4316  
4317  	pci_free_irq_vectors(hdev->pdev);
4318  
4319  	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4320  }
4321  
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)4322  static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4323  {
4324  	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4325  	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4326  	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4327  	int rc;
4328  
4329  	if (hdev->pldm)
4330  		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4331  	else
4332  		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4333  
4334  	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4335  		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4336  		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4337  			continue;
4338  
4339  		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4340  
4341  		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4342  
4343  		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4344  
4345  		/* Wait till all traffic from decoder stops
4346  		 * before apply core reset.
4347  		 */
4348  		rc = hl_poll_timeout(
4349  				hdev,
4350  				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4351  				graceful,
4352  				(graceful & graceful_pend_mask),
4353  				100,
4354  				timeout_usec);
4355  		if (rc)
4356  			dev_err(hdev->dev,
4357  				"Failed to stop traffic from DCORE%d Decoder %d\n",
4358  				dcore_id, dec_id);
4359  	}
4360  }
4361  
gaudi2_stop_pcie_dec(struct hl_device * hdev)4362  static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4363  {
4364  	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4365  	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4366  	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4367  	int rc;
4368  
4369  	if (hdev->pldm)
4370  		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4371  	else
4372  		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4373  
4374  	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4375  		dec_bit = PCIE_DEC_SHIFT + dec_id;
4376  		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4377  			continue;
4378  
4379  		offset = dec_id * PCIE_VDEC_OFFSET;
4380  
4381  		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4382  
4383  		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4384  
4385  		/* Wait till all traffic from decoder stops
4386  		 * before apply core reset.
4387  		 */
4388  		rc = hl_poll_timeout(
4389  				hdev,
4390  				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4391  				graceful,
4392  				(graceful & graceful_pend_mask),
4393  				100,
4394  				timeout_usec);
4395  		if (rc)
4396  			dev_err(hdev->dev,
4397  				"Failed to stop traffic from PCIe Decoder %d\n",
4398  				dec_id);
4399  	}
4400  }
4401  
gaudi2_stop_dec(struct hl_device * hdev)4402  static void gaudi2_stop_dec(struct hl_device *hdev)
4403  {
4404  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4405  	int dcore_id;
4406  
4407  	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4408  		return;
4409  
4410  	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4411  		gaudi2_stop_dcore_dec(hdev, dcore_id);
4412  
4413  	gaudi2_stop_pcie_dec(hdev);
4414  }
4415  
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4416  static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4417  {
4418  	u32 reg_base, reg_val;
4419  
4420  	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4421  	if (run_mode == HL_ENGINE_CORE_RUN)
4422  		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4423  	else
4424  		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4425  
4426  	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4427  }
4428  
gaudi2_halt_arcs(struct hl_device * hdev)4429  static void gaudi2_halt_arcs(struct hl_device *hdev)
4430  {
4431  	u16 arc_id;
4432  
4433  	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4434  		if (gaudi2_is_arc_enabled(hdev, arc_id))
4435  			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4436  	}
4437  }
4438  
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4439  static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4440  {
4441  	int rc;
4442  	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4443  
4444  	if (hdev->pldm)
4445  		timeout_usec *= 100;
4446  
4447  	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4448  	if (run_mode == HL_ENGINE_CORE_RUN)
4449  		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4450  	else
4451  		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4452  
4453  	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4454  				val, ((val & ack_mask) == ack_mask),
4455  				1000, timeout_usec);
4456  
4457  	if (!rc) {
4458  		/* Clear */
4459  		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4460  		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4461  	}
4462  
4463  	return rc;
4464  }
4465  
gaudi2_reset_arcs(struct hl_device * hdev)4466  static void gaudi2_reset_arcs(struct hl_device *hdev)
4467  {
4468  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4469  	u16 arc_id;
4470  
4471  	if (!gaudi2)
4472  		return;
4473  
4474  	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4475  		if (gaudi2_is_arc_enabled(hdev, arc_id))
4476  			gaudi2_clr_arc_id_cap(hdev, arc_id);
4477  }
4478  
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)4479  static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4480  {
4481  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4482  	u32 queue_id;
4483  	int i;
4484  
4485  	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4486  		return;
4487  
4488  	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4489  
4490  	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4491  		if (!(hdev->nic_ports_mask & BIT(i)))
4492  			continue;
4493  
4494  		gaudi2_qman_manual_flush_common(hdev, queue_id);
4495  	}
4496  }
4497  
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)4498  static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4499  					u32 num_cores, u32 core_command)
4500  {
4501  	int i, rc;
4502  
4503  	for (i = 0 ; i < num_cores ; i++) {
4504  		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4505  			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4506  	}
4507  
4508  	for (i = 0 ; i < num_cores ; i++) {
4509  		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4510  			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4511  
4512  			if (rc) {
4513  				dev_err(hdev->dev, "failed to %s arc: %d\n",
4514  					(core_command == HL_ENGINE_CORE_HALT) ?
4515  					"HALT" : "RUN", core_ids[i]);
4516  				return -1;
4517  			}
4518  		}
4519  	}
4520  
4521  	return 0;
4522  }
4523  
gaudi2_set_tpc_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4524  static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4525  {
4526  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4527  	u32 reg_base, reg_addr, reg_val, tpc_id;
4528  
4529  	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4530  		return 0;
4531  
4532  	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4533  	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4534  		return 0;
4535  
4536  	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4537  	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4538  	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4539  			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4540  	WREG32(reg_addr, reg_val);
4541  
4542  	if (engine_command == HL_ENGINE_RESUME) {
4543  		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4544  		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4545  		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4546  	}
4547  
4548  	return 0;
4549  }
4550  
gaudi2_set_mme_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4551  static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4552  {
4553  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4554  	u32 reg_base, reg_addr, reg_val, mme_id;
4555  
4556  	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4557  	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4558  		return 0;
4559  
4560  	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4561  	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4562  	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4563  			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4564  	WREG32(reg_addr, reg_val);
4565  
4566  	return 0;
4567  }
4568  
gaudi2_set_edma_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4569  static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4570  {
4571  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4572  	u32 reg_base, reg_addr, reg_val, edma_id;
4573  
4574  	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4575  		return 0;
4576  
4577  	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4578  	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4579  		return 0;
4580  
4581  	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4582  	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4583  	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4584  			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4585  	WREG32(reg_addr, reg_val);
4586  
4587  	if (engine_command == HL_ENGINE_STALL) {
4588  		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4589  				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4590  		WREG32(reg_addr, reg_val);
4591  	}
4592  
4593  	return 0;
4594  }
4595  
gaudi2_set_engine_modes(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4596  static int gaudi2_set_engine_modes(struct hl_device *hdev,
4597  		u32 *engine_ids, u32 num_engines, u32 engine_command)
4598  {
4599  	int i, rc;
4600  
4601  	for (i = 0 ; i < num_engines ; ++i) {
4602  		switch (engine_ids[i]) {
4603  		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4604  		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4605  		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4606  		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4607  			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4608  			if (rc)
4609  				return rc;
4610  
4611  			break;
4612  		case GAUDI2_DCORE0_ENGINE_ID_MME:
4613  		case GAUDI2_DCORE1_ENGINE_ID_MME:
4614  		case GAUDI2_DCORE2_ENGINE_ID_MME:
4615  		case GAUDI2_DCORE3_ENGINE_ID_MME:
4616  			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4617  			if (rc)
4618  				return rc;
4619  
4620  			break;
4621  		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4622  		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4623  		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4624  		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4625  			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4626  			if (rc)
4627  				return rc;
4628  
4629  			break;
4630  		default:
4631  			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4632  			return -EINVAL;
4633  		}
4634  	}
4635  
4636  	return 0;
4637  }
4638  
gaudi2_set_engines(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4639  static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4640  					u32 num_engines, u32 engine_command)
4641  {
4642  	switch (engine_command) {
4643  	case HL_ENGINE_CORE_HALT:
4644  	case HL_ENGINE_CORE_RUN:
4645  		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4646  
4647  	case HL_ENGINE_STALL:
4648  	case HL_ENGINE_RESUME:
4649  		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4650  
4651  	default:
4652  		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4653  		return -EINVAL;
4654  	}
4655  }
4656  
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)4657  static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4658  {
4659  	u32 wait_timeout_ms;
4660  
4661  	if (hdev->pldm)
4662  		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4663  	else
4664  		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4665  
4666  	if (fw_reset)
4667  		goto skip_engines;
4668  
4669  	gaudi2_stop_dma_qmans(hdev);
4670  	gaudi2_stop_mme_qmans(hdev);
4671  	gaudi2_stop_tpc_qmans(hdev);
4672  	gaudi2_stop_rot_qmans(hdev);
4673  	gaudi2_stop_nic_qmans(hdev);
4674  	msleep(wait_timeout_ms);
4675  
4676  	gaudi2_halt_arcs(hdev);
4677  	gaudi2_dma_stall(hdev);
4678  	gaudi2_mme_stall(hdev);
4679  	gaudi2_tpc_stall(hdev);
4680  	gaudi2_rotator_stall(hdev);
4681  
4682  	msleep(wait_timeout_ms);
4683  
4684  	gaudi2_stop_dec(hdev);
4685  
4686  	/*
4687  	 * in case of soft reset do a manual flush for QMANs (currently called
4688  	 * only for NIC QMANs
4689  	 */
4690  	if (!hard_reset)
4691  		gaudi2_nic_qmans_manual_flush(hdev);
4692  
4693  	gaudi2_disable_dma_qmans(hdev);
4694  	gaudi2_disable_mme_qmans(hdev);
4695  	gaudi2_disable_tpc_qmans(hdev);
4696  	gaudi2_disable_rot_qmans(hdev);
4697  	gaudi2_disable_nic_qmans(hdev);
4698  	gaudi2_disable_timestamp(hdev);
4699  
4700  skip_engines:
4701  	if (hard_reset) {
4702  		gaudi2_disable_msix(hdev);
4703  		return;
4704  	}
4705  
4706  	gaudi2_sync_irqs(hdev);
4707  }
4708  
gaudi2_init_firmware_preload_params(struct hl_device * hdev)4709  static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4710  {
4711  	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4712  
4713  	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4714  	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4715  	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4716  	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4717  	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4718  	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4719  }
4720  
gaudi2_init_firmware_loader(struct hl_device * hdev)4721  static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4722  {
4723  	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4724  	struct dynamic_fw_load_mgr *dynamic_loader;
4725  	struct cpu_dyn_regs *dyn_regs;
4726  
4727  	/* fill common fields */
4728  	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4729  	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4730  	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4731  	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4732  	fw_loader->skip_bmc = false;
4733  	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4734  	fw_loader->dram_bar_id = DRAM_BAR_ID;
4735  	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4736  
4737  	/* here we update initial values for few specific dynamic regs (as
4738  	 * before reading the first descriptor from FW those value has to be
4739  	 * hard-coded). in later stages of the protocol those values will be
4740  	 * updated automatically by reading the FW descriptor so data there
4741  	 * will always be up-to-date
4742  	 */
4743  	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4744  	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4745  	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4746  	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4747  	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4748  }
4749  
gaudi2_init_cpu(struct hl_device * hdev)4750  static int gaudi2_init_cpu(struct hl_device *hdev)
4751  {
4752  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4753  	int rc;
4754  
4755  	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4756  		return 0;
4757  
4758  	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4759  		return 0;
4760  
4761  	rc = hl_fw_init_cpu(hdev);
4762  	if (rc)
4763  		return rc;
4764  
4765  	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4766  
4767  	return 0;
4768  }
4769  
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4770  static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4771  {
4772  	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4773  	struct asic_fixed_properties *prop = &hdev->asic_prop;
4774  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4775  	struct cpu_dyn_regs *dyn_regs;
4776  	struct hl_eq *eq;
4777  	u32 status;
4778  	int err;
4779  
4780  	if (!hdev->cpu_queues_enable)
4781  		return 0;
4782  
4783  	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4784  		return 0;
4785  
4786  	eq = &hdev->event_queue;
4787  
4788  	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4789  
4790  	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4791  	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4792  
4793  	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4794  	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4795  
4796  	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4797  	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4798  
4799  	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4800  	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4801  	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4802  
4803  	/* Used for EQ CI */
4804  	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4805  
4806  	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4807  
4808  	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4809  
4810  	/* Let the ARC know we are ready as it is now handling those queues  */
4811  
4812  	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4813  		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4814  
4815  	err = hl_poll_timeout(
4816  		hdev,
4817  		mmCPU_IF_QUEUE_INIT,
4818  		status,
4819  		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4820  		1000,
4821  		cpu_timeout);
4822  
4823  	if (err) {
4824  		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4825  		return -EIO;
4826  	}
4827  
4828  	/* update FW application security bits */
4829  	if (prop->fw_cpu_boot_dev_sts0_valid)
4830  		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4831  
4832  	if (prop->fw_cpu_boot_dev_sts1_valid)
4833  		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4834  
4835  	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4836  	return 0;
4837  }
4838  
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4839  static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4840  				u32 queue_id_base)
4841  {
4842  	struct hl_hw_queue *q;
4843  	u32 pq_id, pq_offset;
4844  
4845  	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4846  		q = &hdev->kernel_queues[queue_id_base + pq_id];
4847  		pq_offset = pq_id * 4;
4848  
4849  		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4850  				lower_32_bits(q->bus_address));
4851  		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4852  				upper_32_bits(q->bus_address));
4853  		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4854  		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4855  		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4856  	}
4857  }
4858  
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)4859  static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4860  {
4861  	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4862  
4863  	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4864  	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4865  	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4866  	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4867  
4868  	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4869  		cp_offset = cp_id * 4;
4870  
4871  		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4872  		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4873  		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4874  		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4875  	}
4876  
4877  	/* allow QMANs to accept work from ARC CQF */
4878  	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4879  }
4880  
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4881  static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4882  				u32 queue_id_base)
4883  {
4884  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4885  	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4886  
4887  	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4888  	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4889  
4890  	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4891  		pq_offset = pq_id * 4;
4892  
4893  		/* Configure QMAN HBW to scratchpad as it is not needed */
4894  		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4895  				lower_32_bits(gaudi2->scratchpad_bus_address));
4896  		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4897  				upper_32_bits(gaudi2->scratchpad_bus_address));
4898  		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4899  				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4900  
4901  		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4902  		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4903  		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4904  		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4905  	}
4906  
4907  	/* Enable QMAN H/W completion */
4908  	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4909  }
4910  
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)4911  static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4912  {
4913  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4914  	u32 sp_reg_addr;
4915  
4916  	switch (queue_id_base) {
4917  	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4918  		fallthrough;
4919  	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4920  		fallthrough;
4921  	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4922  		fallthrough;
4923  	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4924  		fallthrough;
4925  	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4926  		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4927  		break;
4928  	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4929  		fallthrough;
4930  	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4931  		fallthrough;
4932  	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4933  		fallthrough;
4934  	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4935  		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4936  		break;
4937  	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4938  		fallthrough;
4939  	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4940  		fallthrough;
4941  	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4942  		fallthrough;
4943  	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4944  		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4945  		break;
4946  	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4947  		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4948  		break;
4949  	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4950  		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4951  		break;
4952  	default:
4953  		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4954  		return 0;
4955  	}
4956  
4957  	return sp_reg_addr;
4958  }
4959  
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4960  static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4961  					u32 queue_id_base)
4962  {
4963  	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4964  	int map_table_entry;
4965  
4966  	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4967  
4968  	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4969  	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4970  	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4971  
4972  	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4973  	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4974  		gaudi2_irq_map_table[map_table_entry].cpu_id);
4975  
4976  	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4977  
4978  	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4979  	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4980  	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4981  
4982  	/* Enable the QMAN channel.
4983  	 * PDMA QMAN configuration is different, as we do not allow user to
4984  	 * access some of the CPs.
4985  	 * PDMA0: CP2/3 are reserved for the ARC usage.
4986  	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4987  	 */
4988  	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4989  		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4990  	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4991  		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4992  	else
4993  		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4994  }
4995  
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4996  static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4997  		u32 queue_id_base)
4998  {
4999  	u32 pq_id;
5000  
5001  	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5002  		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5003  
5004  	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5005  	gaudi2_init_qman_cp(hdev, reg_base);
5006  	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5007  	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5008  }
5009  
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)5010  static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5011  				u32 dma_core_id, bool is_secure)
5012  {
5013  	u32 prot, irq_handler_offset;
5014  	struct cpu_dyn_regs *dyn_regs;
5015  	int map_table_entry;
5016  
5017  	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5018  	if (is_secure)
5019  		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5020  
5021  	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5022  
5023  	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5024  	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5025  
5026  	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5027  			lower_32_bits(CFG_BASE + irq_handler_offset));
5028  
5029  	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5030  			upper_32_bits(CFG_BASE + irq_handler_offset));
5031  
5032  	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5033  	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5034  		gaudi2_irq_map_table[map_table_entry].cpu_id);
5035  
5036  	/* Enable the DMA channel */
5037  	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5038  }
5039  
gaudi2_init_kdma(struct hl_device * hdev)5040  static void gaudi2_init_kdma(struct hl_device *hdev)
5041  {
5042  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5043  	u32 reg_base;
5044  
5045  	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5046  		return;
5047  
5048  	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5049  
5050  	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5051  
5052  	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5053  }
5054  
gaudi2_init_pdma(struct hl_device * hdev)5055  static void gaudi2_init_pdma(struct hl_device *hdev)
5056  {
5057  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5058  	u32 reg_base;
5059  
5060  	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5061  		return;
5062  
5063  	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5064  	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5065  
5066  	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5067  	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5068  
5069  	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5070  	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5071  
5072  	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5073  	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5074  
5075  	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5076  }
5077  
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)5078  static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5079  {
5080  	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5081  
5082  	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5083  	base_edma_qman_id = edma_stream_base[seq];
5084  
5085  	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5086  	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5087  
5088  	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5089  	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5090  }
5091  
gaudi2_init_edma(struct hl_device * hdev)5092  static void gaudi2_init_edma(struct hl_device *hdev)
5093  {
5094  	struct asic_fixed_properties *prop = &hdev->asic_prop;
5095  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5096  	int dcore, inst;
5097  
5098  	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5099  		return;
5100  
5101  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5102  		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5103  			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5104  
5105  			if (!(prop->edma_enabled_mask & BIT(seq)))
5106  				continue;
5107  
5108  			gaudi2_init_edma_instance(hdev, seq);
5109  
5110  			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5111  		}
5112  	}
5113  }
5114  
5115  /*
5116   * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5117   * @hdev: pointer to habanalabs device structure.
5118   * @sob_id: sync object ID.
5119   * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5120   * @interrupt_id: interrupt ID.
5121   *
5122   * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5123   * write directly to the HBW host memory of the virtual MSI-X doorbell.
5124   * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5125   *
5126   * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5127   * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5128   * completion, by decrementing the sync object value and re-arming the monitor.
5129   */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)5130  static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5131  							u32 first_mon_id, u32 interrupt_id)
5132  {
5133  	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5134  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5135  	u64 addr;
5136  	u8 mask;
5137  
5138  	/* Reset the SOB value */
5139  	sob_offset = sob_id * sizeof(u32);
5140  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5141  
5142  	/* Configure 3 monitors:
5143  	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5144  	 * 2. Decrement SOB value by 1.
5145  	 * 3. Re-arm the master monitor.
5146  	 */
5147  
5148  	first_mon_offset = first_mon_id * sizeof(u32);
5149  
5150  	/* 2nd monitor: Decrement SOB value by 1 */
5151  	mon_offset = first_mon_offset + sizeof(u32);
5152  
5153  	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5154  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5155  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5156  
5157  	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5158  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5159  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5160  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5161  
5162  	/* 3rd monitor: Re-arm the master monitor */
5163  	mon_offset = first_mon_offset + 2 * sizeof(u32);
5164  
5165  	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5166  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5167  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5168  
5169  	sob_group = sob_id / 8;
5170  	mask = ~BIT(sob_id & 0x7);
5171  	mode = 0; /* comparison mode is "greater than or equal to" */
5172  	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5173  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5174  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5175  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5176  
5177  	payload = arm;
5178  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5179  
5180  	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5181  	mon_offset = first_mon_offset;
5182  
5183  	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5184  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5185  
5186  	addr = gaudi2->virt_msix_db_dma_addr;
5187  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5188  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5189  
5190  	payload = interrupt_id;
5191  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5192  
5193  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5194  }
5195  
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)5196  static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5197  {
5198  	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5199  	struct asic_fixed_properties *prop = &hdev->asic_prop;
5200  
5201  	/* Decoder normal/abnormal interrupts */
5202  	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5203  		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5204  			continue;
5205  
5206  		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5207  		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5208  		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5209  		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5210  
5211  		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5212  		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5213  		interrupt_id += 1;
5214  		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5215  	}
5216  }
5217  
gaudi2_init_sm(struct hl_device * hdev)5218  static void gaudi2_init_sm(struct hl_device *hdev)
5219  {
5220  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5221  	u64 cq_address;
5222  	u32 reg_val;
5223  	int i;
5224  
5225  	/* Enable HBW/LBW CQ for completion monitors */
5226  	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5227  	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5228  
5229  	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5230  		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5231  
5232  	/* Enable only HBW CQ for KDMA completion monitor */
5233  	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5234  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5235  
5236  	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5237  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5238  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5239  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5240  
5241  	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5242  		cq_address =
5243  			hdev->completion_queue[i].bus_address;
5244  
5245  		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5246  							lower_32_bits(cq_address));
5247  		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5248  							upper_32_bits(cq_address));
5249  		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5250  							ilog2(HL_CQ_SIZE_IN_BYTES));
5251  	}
5252  
5253  	/* Configure kernel ASID and MMU BP*/
5254  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5255  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5256  
5257  	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5258  	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5259  }
5260  
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)5261  static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5262  {
5263  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5264  	u32 reg_val;
5265  	int i;
5266  
5267  	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5268  	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5269  	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5270  	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5271  	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5272  	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5273  
5274  	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5275  	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5276  
5277  	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5278  		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5279  		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5280  	}
5281  }
5282  
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)5283  static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5284  							bool config_qman_only)
5285  {
5286  	u32 queue_id_base, reg_base;
5287  
5288  	switch (dcore_id) {
5289  	case 0:
5290  		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5291  		break;
5292  	case 1:
5293  		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5294  		break;
5295  	case 2:
5296  		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5297  		break;
5298  	case 3:
5299  		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5300  		break;
5301  	default:
5302  		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5303  		return;
5304  	}
5305  
5306  	if (!config_qman_only) {
5307  		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5308  		gaudi2_init_mme_acc(hdev, reg_base);
5309  	}
5310  
5311  	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5312  	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5313  }
5314  
gaudi2_init_mme(struct hl_device * hdev)5315  static void gaudi2_init_mme(struct hl_device *hdev)
5316  {
5317  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5318  	int i;
5319  
5320  	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5321  		return;
5322  
5323  	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5324  		gaudi2_init_dcore_mme(hdev, i, false);
5325  
5326  		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5327  	}
5328  }
5329  
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)5330  static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5331  {
5332  	/* Mask arithmetic and QM interrupts in TPC */
5333  	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5334  
5335  	/* Set 16 cache lines */
5336  	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5337  			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5338  }
5339  
5340  struct gaudi2_tpc_init_cfg_data {
5341  	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5342  };
5343  
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)5344  static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5345  					u32 offset, struct iterate_module_ctx *ctx)
5346  {
5347  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5348  	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5349  	u32 queue_id_base;
5350  	u8 seq;
5351  
5352  	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5353  
5354  	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5355  		/* gets last sequence number */
5356  		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5357  	else
5358  		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5359  
5360  	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5361  	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5362  
5363  	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5364  }
5365  
gaudi2_init_tpc(struct hl_device * hdev)5366  static void gaudi2_init_tpc(struct hl_device *hdev)
5367  {
5368  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5369  	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5370  	struct iterate_module_ctx tpc_iter;
5371  
5372  	if (!hdev->asic_prop.tpc_enabled_mask)
5373  		return;
5374  
5375  	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5376  		return;
5377  
5378  	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5379  	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5380  	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5381  	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5382  	tpc_iter.fn = &gaudi2_init_tpc_config;
5383  	tpc_iter.data = &init_cfg_data;
5384  	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5385  }
5386  
gaudi2_init_rotator(struct hl_device * hdev)5387  static void gaudi2_init_rotator(struct hl_device *hdev)
5388  {
5389  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5390  	u32 i, reg_base, queue_id;
5391  
5392  	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5393  
5394  	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5395  		reg_base = gaudi2_qm_blocks_bases[queue_id];
5396  		gaudi2_init_qman(hdev, reg_base, queue_id);
5397  
5398  		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5399  	}
5400  }
5401  
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)5402  static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5403  {
5404  	u32 sob_id;
5405  
5406  	/* VCMD normal interrupt */
5407  	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5408  	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5409  			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5410  	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5411  
5412  	/* VCMD abnormal interrupt */
5413  	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5414  	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5415  			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5416  	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5417  }
5418  
gaudi2_init_dec(struct hl_device * hdev)5419  static void gaudi2_init_dec(struct hl_device *hdev)
5420  {
5421  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5422  	u32 dcore_id, dec_id, dec_bit;
5423  	u64 base_addr;
5424  
5425  	if (!hdev->asic_prop.decoder_enabled_mask)
5426  		return;
5427  
5428  	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5429  		return;
5430  
5431  	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5432  		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5433  			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5434  
5435  			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5436  				continue;
5437  
5438  			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5439  					BRDG_CTRL_BLOCK_OFFSET +
5440  					dcore_id * DCORE_OFFSET +
5441  					dec_id * DCORE_VDEC_OFFSET;
5442  
5443  			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5444  
5445  			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5446  		}
5447  
5448  	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5449  		dec_bit = PCIE_DEC_SHIFT + dec_id;
5450  		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5451  			continue;
5452  
5453  		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5454  				dec_id * DCORE_VDEC_OFFSET;
5455  
5456  		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5457  
5458  		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5459  	}
5460  }
5461  
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)5462  static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5463  					u32 stlb_base, u32 asid, u64 phys_addr)
5464  {
5465  	u32 status, timeout_usec;
5466  	int rc;
5467  
5468  	if (hdev->pldm || !hdev->pdev)
5469  		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5470  	else
5471  		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5472  
5473  	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5474  	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5475  	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5476  	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5477  
5478  	rc = hl_poll_timeout(
5479  		hdev,
5480  		stlb_base + STLB_BUSY_OFFSET,
5481  		status,
5482  		!(status & 0x80000000),
5483  		1000,
5484  		timeout_usec);
5485  
5486  	if (rc) {
5487  		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5488  		return rc;
5489  	}
5490  
5491  	return 0;
5492  }
5493  
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)5494  static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5495  					u32 start_offset, u32 inv_start_val,
5496  					u32 flags)
5497  {
5498  	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5499  	if (flags & MMU_OP_CLEAR_MEMCACHE)
5500  		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5501  
5502  	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5503  		return;
5504  
5505  	WREG32(stlb_base + start_offset, inv_start_val);
5506  }
5507  
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5508  static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5509  						struct gaudi2_cache_invld_params *inv_params)
5510  {
5511  	u32 status, timeout_usec, start_offset;
5512  	int rc;
5513  
5514  	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5515  					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5516  
5517  	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5518  	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5519  		rc = hl_poll_timeout(
5520  			hdev,
5521  			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5522  			status,
5523  			status & 0x1,
5524  			1000,
5525  			timeout_usec);
5526  
5527  		if (rc)
5528  			return rc;
5529  
5530  		/* Need to manually reset the status to 0 */
5531  		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5532  	}
5533  
5534  	/* Lower cache does not work with cache lines, hence we can skip its
5535  	 * invalidation upon map and invalidate only upon unmap
5536  	 */
5537  	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5538  		return 0;
5539  
5540  	start_offset = inv_params->range_invalidation ?
5541  			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5542  
5543  	rc = hl_poll_timeout(
5544  		hdev,
5545  		stlb_base + start_offset,
5546  		status,
5547  		!(status & 0x1),
5548  		1000,
5549  		timeout_usec);
5550  
5551  	return rc;
5552  }
5553  
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)5554  bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5555  {
5556  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5557  	u32 hw_cap;
5558  
5559  	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5560  
5561  	if (gaudi2->hw_cap_initialized & hw_cap)
5562  		return true;
5563  
5564  	return false;
5565  }
5566  
5567  /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)5568  static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5569  {
5570  	u32 offset;
5571  
5572  	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5573  	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5574  }
5575  
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5576  static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5577  						struct gaudi2_cache_invld_params *inv_params)
5578  {
5579  	u32 start_offset;
5580  
5581  	if (inv_params->range_invalidation) {
5582  		/* Set the addresses range
5583  		 * Note: that the start address we set in register, is not included in
5584  		 * the range of the invalidation, by design.
5585  		 * that's why we need to set lower address than the one we actually
5586  		 * want to be included in the range invalidation.
5587  		 */
5588  		u64 start = inv_params->start_va - 1;
5589  
5590  		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5591  
5592  		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5593  				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5594  
5595  		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5596  				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5597  
5598  		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5599  				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5600  
5601  		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5602  				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5603  	} else {
5604  		start_offset = STLB_INV_ALL_START_OFFSET;
5605  	}
5606  
5607  	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5608  						inv_params->inv_start_val, inv_params->flags);
5609  }
5610  
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5611  static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5612  						int dcore_id, int hmmu_id,
5613  						struct gaudi2_cache_invld_params *inv_params)
5614  {
5615  	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5616  
5617  	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5618  }
5619  
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5620  static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5621  						int dcore_id, int hmmu_id,
5622  						struct gaudi2_cache_invld_params *inv_params)
5623  {
5624  	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5625  
5626  	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5627  }
5628  
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)5629  static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5630  						struct gaudi2_cache_invld_params *inv_params)
5631  {
5632  	int dcore_id, hmmu_id;
5633  
5634  	/* first send all invalidation commands */
5635  	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5636  		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5637  			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5638  				continue;
5639  
5640  			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5641  		}
5642  	}
5643  
5644  	/* next, poll all invalidations status */
5645  	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5646  		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5647  			int rc;
5648  
5649  			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5650  				continue;
5651  
5652  			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5653  										inv_params);
5654  			if (rc)
5655  				return rc;
5656  		}
5657  	}
5658  
5659  	return 0;
5660  }
5661  
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5662  static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5663  {
5664  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5665  	struct gaudi2_cache_invld_params invld_params;
5666  	int rc = 0;
5667  
5668  	if (hdev->reset_info.hard_reset_pending)
5669  		return rc;
5670  
5671  	invld_params.range_invalidation = false;
5672  	invld_params.inv_start_val = 1;
5673  
5674  	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5675  		invld_params.flags = flags;
5676  		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5677  		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5678  										&invld_params);
5679  	} else if (flags & MMU_OP_PHYS_PACK) {
5680  		invld_params.flags = 0;
5681  		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5682  	}
5683  
5684  	return rc;
5685  }
5686  
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)5687  static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5688  				u32 flags, u32 asid, u64 va, u64 size)
5689  {
5690  	struct gaudi2_cache_invld_params invld_params = {0};
5691  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5692  	u64 start_va, end_va;
5693  	u32 inv_start_val;
5694  	int rc = 0;
5695  
5696  	if (hdev->reset_info.hard_reset_pending)
5697  		return 0;
5698  
5699  	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5700  			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5701  			asid << MMU_RANGE_INV_ASID_SHIFT);
5702  	start_va = va;
5703  	end_va = start_va + size;
5704  
5705  	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5706  		/* As range invalidation does not support zero address we will
5707  		 * do full invalidation in this case
5708  		 */
5709  		if (start_va) {
5710  			invld_params.range_invalidation = true;
5711  			invld_params.start_va = start_va;
5712  			invld_params.end_va = end_va;
5713  			invld_params.inv_start_val = inv_start_val;
5714  			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5715  		} else {
5716  			invld_params.range_invalidation = false;
5717  			invld_params.inv_start_val = 1;
5718  			invld_params.flags = flags;
5719  		}
5720  
5721  
5722  		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5723  		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5724  										&invld_params);
5725  		if (rc)
5726  			return rc;
5727  
5728  	} else if (flags & MMU_OP_PHYS_PACK) {
5729  		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5730  		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5731  		invld_params.inv_start_val = inv_start_val;
5732  		invld_params.flags = flags;
5733  		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5734  	}
5735  
5736  	return rc;
5737  }
5738  
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base)5739  static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5740  {
5741  	struct asic_fixed_properties *prop = &hdev->asic_prop;
5742  	u64 hop0_addr;
5743  	u32 asid, max_asid = prop->max_asid;
5744  	int rc;
5745  
5746  	/* it takes too much time to init all of the ASIDs on palladium */
5747  	if (hdev->pldm)
5748  		max_asid = min((u32) 8, max_asid);
5749  
5750  	for (asid = 0 ; asid < max_asid ; asid++) {
5751  		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5752  		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5753  		if (rc) {
5754  			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5755  			return rc;
5756  		}
5757  	}
5758  
5759  	return 0;
5760  }
5761  
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base)5762  static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5763  {
5764  	u32 status, timeout_usec;
5765  	int rc;
5766  
5767  	if (hdev->pldm || !hdev->pdev)
5768  		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5769  	else
5770  		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5771  
5772  	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5773  
5774  	rc = hl_poll_timeout(
5775  		hdev,
5776  		stlb_base + STLB_SRAM_INIT_OFFSET,
5777  		status,
5778  		!status,
5779  		1000,
5780  		timeout_usec);
5781  
5782  	if (rc)
5783  		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5784  
5785  	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5786  	if (rc)
5787  		return rc;
5788  
5789  	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5790  
5791  	rc = hl_poll_timeout(
5792  		hdev,
5793  		stlb_base + STLB_INV_ALL_START_OFFSET,
5794  		status,
5795  		!status,
5796  		1000,
5797  		timeout_usec);
5798  
5799  	if (rc)
5800  		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5801  
5802  	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5803  
5804  	return rc;
5805  }
5806  
gaudi2_pci_mmu_init(struct hl_device * hdev)5807  static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5808  {
5809  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5810  	u32 mmu_base, stlb_base;
5811  	int rc;
5812  
5813  	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5814  		return 0;
5815  
5816  	mmu_base = mmPMMU_HBW_MMU_BASE;
5817  	stlb_base = mmPMMU_HBW_STLB_BASE;
5818  
5819  	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5820  		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5821  		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5822  		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5823  		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5824  		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5825  		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5826  		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5827  		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5828  		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5829  		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5830  
5831  	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5832  
5833  	if (PAGE_SIZE == SZ_64K) {
5834  		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5835  		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5836  			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5837  			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5838  			FIELD_PREP(
5839  				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5840  				1),
5841  			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5842  			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5843  			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5844  	}
5845  
5846  	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5847  
5848  	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5849  	if (rc)
5850  		return rc;
5851  
5852  	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5853  
5854  	return 0;
5855  }
5856  
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)5857  static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5858  				int hmmu_id)
5859  {
5860  	struct asic_fixed_properties *prop = &hdev->asic_prop;
5861  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5862  	u32 offset, mmu_base, stlb_base, hw_cap;
5863  	u8 dmmu_seq;
5864  	int rc;
5865  
5866  	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5867  	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5868  
5869  	/*
5870  	 * return if DMMU is already initialized or if it's not out of
5871  	 * isolation (due to cluster binning)
5872  	 */
5873  	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5874  		return 0;
5875  
5876  	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5877  	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5878  	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5879  
5880  	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5881  			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5882  
5883  	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5884  		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5885  		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5886  		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5887  		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5888  		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5889  			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5890  			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5891  			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5892  			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5893  			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5894  
5895  	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5896  			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5897  
5898  	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5899  
5900  	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5901  	if (rc)
5902  		return rc;
5903  
5904  	gaudi2->hw_cap_initialized |= hw_cap;
5905  
5906  	return 0;
5907  }
5908  
gaudi2_hbm_mmu_init(struct hl_device * hdev)5909  static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5910  {
5911  	int rc, dcore_id, hmmu_id;
5912  
5913  	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5914  		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5915  			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5916  			if (rc)
5917  				return rc;
5918  		}
5919  
5920  	return 0;
5921  }
5922  
gaudi2_mmu_init(struct hl_device * hdev)5923  static int gaudi2_mmu_init(struct hl_device *hdev)
5924  {
5925  	int rc;
5926  
5927  	rc = gaudi2_pci_mmu_init(hdev);
5928  	if (rc)
5929  		return rc;
5930  
5931  	rc = gaudi2_hbm_mmu_init(hdev);
5932  	if (rc)
5933  		return rc;
5934  
5935  	return 0;
5936  }
5937  
gaudi2_hw_init(struct hl_device * hdev)5938  static int gaudi2_hw_init(struct hl_device *hdev)
5939  {
5940  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5941  	int rc;
5942  
5943  	/* Let's mark in the H/W that we have reached this point. We check
5944  	 * this value in the reset_before_init function to understand whether
5945  	 * we need to reset the chip before doing H/W init. This register is
5946  	 * cleared by the H/W upon H/W reset
5947  	 */
5948  	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5949  
5950  	/* Perform read from the device to make sure device is up */
5951  	RREG32(mmHW_STATE);
5952  
5953  	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5954  	 * So we set it here and if anyone tries to move it later to
5955  	 * a different address, there will be an error
5956  	 */
5957  	if (hdev->asic_prop.iatu_done_by_fw)
5958  		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5959  
5960  	/*
5961  	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5962  	 * base address of dram
5963  	 */
5964  	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5965  		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5966  		return -EIO;
5967  	}
5968  
5969  	rc = gaudi2_init_cpu(hdev);
5970  	if (rc) {
5971  		dev_err(hdev->dev, "failed to initialize CPU\n");
5972  		return rc;
5973  	}
5974  
5975  	gaudi2_init_scrambler_hbm(hdev);
5976  	gaudi2_init_kdma(hdev);
5977  
5978  	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5979  	if (rc) {
5980  		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5981  		return rc;
5982  	}
5983  
5984  	rc = gaudi2->cpucp_info_get(hdev);
5985  	if (rc) {
5986  		dev_err(hdev->dev, "Failed to get cpucp info\n");
5987  		return rc;
5988  	}
5989  
5990  	rc = gaudi2_mmu_init(hdev);
5991  	if (rc)
5992  		return rc;
5993  
5994  	gaudi2_init_pdma(hdev);
5995  	gaudi2_init_edma(hdev);
5996  	gaudi2_init_sm(hdev);
5997  	gaudi2_init_tpc(hdev);
5998  	gaudi2_init_mme(hdev);
5999  	gaudi2_init_rotator(hdev);
6000  	gaudi2_init_dec(hdev);
6001  	gaudi2_enable_timestamp(hdev);
6002  
6003  	rc = gaudi2_coresight_init(hdev);
6004  	if (rc)
6005  		goto disable_queues;
6006  
6007  	rc = gaudi2_enable_msix(hdev);
6008  	if (rc)
6009  		goto disable_queues;
6010  
6011  	/* Perform read from the device to flush all configuration */
6012  	RREG32(mmHW_STATE);
6013  
6014  	return 0;
6015  
6016  disable_queues:
6017  	gaudi2_disable_dma_qmans(hdev);
6018  	gaudi2_disable_mme_qmans(hdev);
6019  	gaudi2_disable_tpc_qmans(hdev);
6020  	gaudi2_disable_rot_qmans(hdev);
6021  	gaudi2_disable_nic_qmans(hdev);
6022  
6023  	gaudi2_disable_timestamp(hdev);
6024  
6025  	return rc;
6026  }
6027  
6028  /**
6029   * gaudi2_send_hard_reset_cmd - common function to handle reset
6030   *
6031   * @hdev: pointer to the habanalabs device structure
6032   *
6033   * This function handles the various possible scenarios for reset.
6034   * It considers if reset is handled by driver\FW and what FW components are loaded
6035   */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)6036  static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6037  {
6038  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6039  	bool heartbeat_reset, preboot_only, cpu_initialized = false;
6040  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6041  	u32 cpu_boot_status;
6042  
6043  	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6044  	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6045  
6046  	/*
6047  	 * Handle corner case where failure was at cpu management app load,
6048  	 * and driver didn't detect any failure while loading the FW,
6049  	 * then at such scenario driver will send only HALT_MACHINE
6050  	 * and no one will respond to this request since FW already back to preboot
6051  	 * and it cannot handle such cmd.
6052  	 * In this case next time the management app loads it'll check on events register
6053  	 * which will still have the halt indication, and will reboot the device.
6054  	 * The solution is to let preboot clear all relevant registers before next boot
6055  	 * once driver send COMMS_RST_DEV.
6056  	 */
6057  	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6058  
6059  	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6060  			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6061  		cpu_initialized = true;
6062  
6063  	/*
6064  	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6065  	 * 1. FW reset: FW initiate the reset sequence
6066  	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6067  	 *                  reset but not the reset itself as it is not implemented
6068  	 *                  on their part) and LKD will wait to let FW complete the
6069  	 *                  sequence before issuing the reset
6070  	 */
6071  	if (!preboot_only && cpu_initialized) {
6072  		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6073  			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6074  
6075  		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6076  	}
6077  
6078  	/*
6079  	 * When working with preboot (without Linux/Boot fit) we can
6080  	 * communicate only using the COMMS commands to issue halt/reset.
6081  	 *
6082  	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6083  	 * attempt to revive the card in the small chance that the f/w has
6084  	 * experienced a watchdog event, which caused it to return back to preboot.
6085  	 * In that case, triggering reset through GIC won't help. We need to
6086  	 * trigger the reset as if Linux wasn't loaded.
6087  	 *
6088  	 * We do it only if the reset cause was HB, because that would be the
6089  	 * indication of such an event.
6090  	 *
6091  	 * In case watchdog hasn't expired but we still got HB, then this won't
6092  	 * do any damage.
6093  	 */
6094  
6095  	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6096  		if (hdev->asic_prop.hard_reset_done_by_fw)
6097  			hl_fw_ask_hard_reset_without_linux(hdev);
6098  		else
6099  			hl_fw_ask_halt_machine_without_linux(hdev);
6100  	}
6101  }
6102  
6103  /**
6104   * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6105   *
6106   * @hdev: pointer to the habanalabs device structure
6107   *
6108   * This function executes hard reset based on if driver/FW should do the reset
6109   */
gaudi2_execute_hard_reset(struct hl_device * hdev)6110  static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6111  {
6112  	if (hdev->asic_prop.hard_reset_done_by_fw) {
6113  		gaudi2_send_hard_reset_cmd(hdev);
6114  		return;
6115  	}
6116  
6117  	/* Set device to handle FLR by H/W as we will put the device
6118  	 * CPU to halt mode
6119  	 */
6120  	WREG32(mmPCIE_AUX_FLR_CTRL,
6121  			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6122  
6123  	gaudi2_send_hard_reset_cmd(hdev);
6124  
6125  	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6126  }
6127  
gaudi2_get_soft_rst_done_indication(struct hl_device * hdev,u32 poll_timeout_us)6128  static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6129  {
6130  	int i, rc = 0;
6131  	u32 reg_val;
6132  
6133  	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6134  		rc = hl_poll_timeout(
6135  			hdev,
6136  			mmCPU_RST_STATUS_TO_HOST,
6137  			reg_val,
6138  			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6139  			1000,
6140  			poll_timeout_us);
6141  
6142  	if (rc)
6143  		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6144  				reg_val);
6145  	return rc;
6146  }
6147  
6148  /**
6149   * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6150   *
6151   * @hdev: pointer to the habanalabs device structure
6152   * @driver_performs_reset: true if driver should perform reset instead of f/w.
6153   * @poll_timeout_us: time to wait for response from f/w.
6154   *
6155   * This function executes soft reset based on if driver/FW should do the reset
6156   */
gaudi2_execute_soft_reset(struct hl_device * hdev,bool driver_performs_reset,u32 poll_timeout_us)6157  static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6158  						u32 poll_timeout_us)
6159  {
6160  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6161  	int rc = 0;
6162  
6163  	if (!driver_performs_reset) {
6164  		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6165  			/* set SP to indicate reset request sent to FW */
6166  			if (dyn_regs->cpu_rst_status)
6167  				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6168  			else
6169  				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6170  			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6171  				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6172  
6173  			/* wait for f/w response */
6174  			rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6175  		} else {
6176  			rc = hl_fw_send_soft_reset(hdev);
6177  		}
6178  		return rc;
6179  	}
6180  
6181  	/* Block access to engines, QMANs and SM during reset, these
6182  	 * RRs will be reconfigured after soft reset.
6183  	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6184  	 */
6185  	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6186  					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6187  
6188  	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6189  				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6190  				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6191  
6192  	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6193  	return 0;
6194  }
6195  
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 poll_timeout_us)6196  static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6197  {
6198  	int i, rc = 0;
6199  	u32 reg_val;
6200  
6201  	/* We poll the BTM done indication multiple times after reset due to
6202  	 * a HW errata 'GAUDI2_0300'
6203  	 */
6204  	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6205  		rc = hl_poll_timeout(
6206  			hdev,
6207  			mmPSOC_GLOBAL_CONF_BTM_FSM,
6208  			reg_val,
6209  			reg_val == 0,
6210  			1000,
6211  			poll_timeout_us);
6212  
6213  	if (rc)
6214  		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6215  }
6216  
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)6217  static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6218  {
6219  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6220  	u32 poll_timeout_us, reset_sleep_ms;
6221  	bool driver_performs_reset = false;
6222  	int rc;
6223  
6224  	if (hdev->pldm) {
6225  		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6226  						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6227  		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6228  	} else {
6229  		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6230  		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6231  	}
6232  
6233  	if (fw_reset)
6234  		goto skip_reset;
6235  
6236  	gaudi2_reset_arcs(hdev);
6237  
6238  	if (hard_reset) {
6239  		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6240  		gaudi2_execute_hard_reset(hdev);
6241  	} else {
6242  		/*
6243  		 * As we have to support also work with preboot only (which does not supports
6244  		 * soft reset) we have to make sure that security is disabled before letting driver
6245  		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6246  		 * secured device with preboot only.
6247  		 */
6248  		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6249  							!hdev->asic_prop.fw_security_enabled);
6250  		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6251  		if (rc)
6252  			return rc;
6253  	}
6254  
6255  skip_reset:
6256  	if (driver_performs_reset || hard_reset) {
6257  		/*
6258  		 * Instead of waiting for BTM indication we should wait for preboot ready:
6259  		 * Consider the below scenario:
6260  		 * 1. FW update is being triggered
6261  		 *        - setting the dirty bit
6262  		 * 2. hard reset will be triggered due to the dirty bit
6263  		 * 3. FW initiates the reset:
6264  		 *        - dirty bit cleared
6265  		 *        - BTM indication cleared
6266  		 *        - preboot ready indication cleared
6267  		 * 4. during hard reset:
6268  		 *        - BTM indication will be set
6269  		 *        - BIST test performed and another reset triggered
6270  		 * 5. only after this reset the preboot will set the preboot ready
6271  		 *
6272  		 * when polling on BTM indication alone we can lose sync with FW while trying to
6273  		 * communicate with FW that is during reset.
6274  		 * to overcome this we will always wait to preboot ready indication
6275  		 */
6276  
6277  		/* without this sleep reset will not work */
6278  		msleep(reset_sleep_ms);
6279  
6280  		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6281  			hl_fw_wait_preboot_ready(hdev);
6282  		else
6283  			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6284  	}
6285  
6286  	if (!gaudi2)
6287  		return 0;
6288  
6289  	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6290  	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6291  
6292  	/*
6293  	 * Clear NIC capability mask in order for driver to re-configure
6294  	 * NIC QMANs. NIC ports will not be re-configured during soft
6295  	 * reset as we call gaudi2_nic_init only during hard reset
6296  	 */
6297  	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6298  
6299  	if (hard_reset) {
6300  		gaudi2->hw_cap_initialized &=
6301  			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6302  			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6303  			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6304  			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6305  			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6306  
6307  		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6308  	} else {
6309  		gaudi2->hw_cap_initialized &=
6310  			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6311  			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6312  			HW_CAP_ROT_MASK);
6313  	}
6314  	return 0;
6315  }
6316  
gaudi2_suspend(struct hl_device * hdev)6317  static int gaudi2_suspend(struct hl_device *hdev)
6318  {
6319  	int rc;
6320  
6321  	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6322  	if (rc)
6323  		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6324  
6325  	return rc;
6326  }
6327  
gaudi2_resume(struct hl_device * hdev)6328  static int gaudi2_resume(struct hl_device *hdev)
6329  {
6330  	return gaudi2_init_iatu(hdev);
6331  }
6332  
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)6333  static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6334  		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6335  {
6336  	int rc;
6337  
6338  	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6339  			VM_DONTCOPY | VM_NORESERVE);
6340  
6341  #ifdef _HAS_DMA_MMAP_COHERENT
6342  
6343  	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6344  	if (rc)
6345  		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6346  
6347  #else
6348  
6349  	rc = remap_pfn_range(vma, vma->vm_start,
6350  				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6351  				size, vma->vm_page_prot);
6352  	if (rc)
6353  		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6354  
6355  #endif
6356  
6357  	return rc;
6358  }
6359  
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)6360  static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6361  {
6362  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6363  	u64 hw_cap_mask = 0;
6364  	u64 hw_tpc_cap_bit = 0;
6365  	u64 hw_nic_cap_bit = 0;
6366  	u64 hw_test_cap_bit = 0;
6367  
6368  	switch (hw_queue_id) {
6369  	case GAUDI2_QUEUE_ID_PDMA_0_0:
6370  	case GAUDI2_QUEUE_ID_PDMA_0_1:
6371  	case GAUDI2_QUEUE_ID_PDMA_1_0:
6372  		hw_cap_mask = HW_CAP_PDMA_MASK;
6373  		break;
6374  	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6375  		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6376  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6377  		break;
6378  	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6379  		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6380  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6381  		break;
6382  	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6383  		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6384  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6385  		break;
6386  	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6387  		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6388  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6389  		break;
6390  
6391  	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6392  		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6393  		break;
6394  
6395  	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6396  		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6397  		break;
6398  
6399  	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6400  		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6401  		break;
6402  
6403  	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6404  		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6405  		break;
6406  
6407  	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6408  		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6409  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6410  
6411  		/* special case where cap bit refers to the first queue id */
6412  		if (!hw_tpc_cap_bit)
6413  			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6414  		break;
6415  
6416  	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6417  		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6418  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6419  		break;
6420  
6421  	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6422  		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6423  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6424  		break;
6425  
6426  	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6427  		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6428  			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6429  		break;
6430  
6431  	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6432  		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6433  		break;
6434  
6435  	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6436  		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6437  		break;
6438  
6439  	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6440  		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6441  
6442  		/* special case where cap bit refers to the first queue id */
6443  		if (!hw_nic_cap_bit)
6444  			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6445  		break;
6446  
6447  	case GAUDI2_QUEUE_ID_CPU_PQ:
6448  		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6449  
6450  	default:
6451  		return false;
6452  	}
6453  
6454  	if (hw_tpc_cap_bit)
6455  		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6456  
6457  	if (hw_nic_cap_bit)
6458  		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6459  
6460  	if (hw_test_cap_bit)
6461  		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6462  
6463  	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6464  }
6465  
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)6466  static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6467  {
6468  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6469  
6470  	switch (arc_id) {
6471  	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6472  	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6473  		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6474  
6475  	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6476  		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6477  
6478  	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6479  		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6480  
6481  	default:
6482  		return false;
6483  	}
6484  }
6485  
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)6486  static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6487  {
6488  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6489  
6490  	switch (arc_id) {
6491  	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6492  	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6493  		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6494  		break;
6495  
6496  	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6497  		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6498  		break;
6499  
6500  	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6501  		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6502  		break;
6503  
6504  	default:
6505  		return;
6506  	}
6507  }
6508  
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)6509  static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6510  {
6511  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6512  
6513  	switch (arc_id) {
6514  	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6515  	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6516  		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6517  		break;
6518  
6519  	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6520  		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6521  		break;
6522  
6523  	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6524  		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6525  		break;
6526  
6527  	default:
6528  		return;
6529  	}
6530  }
6531  
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)6532  static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6533  {
6534  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6535  	u32 pq_offset, reg_base, db_reg_offset, db_value;
6536  
6537  	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6538  		/*
6539  		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6540  		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6541  		 * number.
6542  		 */
6543  		pq_offset = (hw_queue_id & 0x3) * 4;
6544  		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6545  		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6546  	} else {
6547  		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6548  	}
6549  
6550  	db_value = pi;
6551  
6552  	/* ring the doorbell */
6553  	WREG32(db_reg_offset, db_value);
6554  
6555  	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6556  		/* make sure device CPU will read latest data from host */
6557  		mb();
6558  		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6559  			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6560  	}
6561  }
6562  
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)6563  static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6564  {
6565  	__le64 *pbd = (__le64 *) bd;
6566  
6567  	/* The QMANs are on the host memory so a simple copy suffice */
6568  	pqe[0] = pbd[0];
6569  	pqe[1] = pbd[1];
6570  }
6571  
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)6572  static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6573  				dma_addr_t *dma_handle, gfp_t flags)
6574  {
6575  	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6576  }
6577  
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)6578  static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6579  				void *cpu_addr, dma_addr_t dma_handle)
6580  {
6581  	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6582  }
6583  
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)6584  static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6585  				u32 timeout, u64 *result)
6586  {
6587  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6588  
6589  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6590  		if (result)
6591  			*result = 0;
6592  		return 0;
6593  	}
6594  
6595  	if (!timeout)
6596  		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6597  
6598  	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6599  }
6600  
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)6601  static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6602  				gfp_t mem_flags, dma_addr_t *dma_handle)
6603  {
6604  	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6605  		return NULL;
6606  
6607  	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6608  }
6609  
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)6610  static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6611  {
6612  	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6613  }
6614  
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)6615  static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6616  						dma_addr_t *dma_handle)
6617  {
6618  	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6619  }
6620  
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)6621  static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6622  {
6623  	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6624  }
6625  
gaudi2_dma_map_single(struct hl_device * hdev,void * addr,int len,enum dma_data_direction dir)6626  static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6627  					enum dma_data_direction dir)
6628  {
6629  	dma_addr_t dma_addr;
6630  
6631  	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6632  	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6633  		return 0;
6634  
6635  	return dma_addr;
6636  }
6637  
gaudi2_dma_unmap_single(struct hl_device * hdev,dma_addr_t addr,int len,enum dma_data_direction dir)6638  static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6639  					enum dma_data_direction dir)
6640  {
6641  	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6642  }
6643  
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)6644  static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6645  {
6646  	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6647  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6648  
6649  	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6650  		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6651  		return -EINVAL;
6652  	}
6653  
6654  	/* Just check if CB address is valid */
6655  
6656  	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6657  					parser->user_cb_size,
6658  					asic_prop->sram_user_base_address,
6659  					asic_prop->sram_end_address))
6660  		return 0;
6661  
6662  	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6663  					parser->user_cb_size,
6664  					asic_prop->dram_user_base_address,
6665  					asic_prop->dram_end_address))
6666  		return 0;
6667  
6668  	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6669  		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6670  						parser->user_cb_size,
6671  						asic_prop->dmmu.start_addr,
6672  						asic_prop->dmmu.end_addr))
6673  		return 0;
6674  
6675  	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6676  		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6677  					parser->user_cb_size,
6678  					asic_prop->pmmu.start_addr,
6679  					asic_prop->pmmu.end_addr) ||
6680  			hl_mem_area_inside_range(
6681  					(u64) (uintptr_t) parser->user_cb,
6682  					parser->user_cb_size,
6683  					asic_prop->pmmu_huge.start_addr,
6684  					asic_prop->pmmu_huge.end_addr))
6685  			return 0;
6686  
6687  	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6688  		if (!hdev->pdev)
6689  			return 0;
6690  
6691  		if (!device_iommu_mapped(&hdev->pdev->dev))
6692  			return 0;
6693  	}
6694  
6695  	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6696  		parser->user_cb, parser->user_cb_size);
6697  
6698  	return -EFAULT;
6699  }
6700  
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)6701  static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6702  {
6703  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6704  
6705  	if (!parser->is_kernel_allocated_cb)
6706  		return gaudi2_validate_cb_address(hdev, parser);
6707  
6708  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6709  		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6710  		return -EINVAL;
6711  	}
6712  
6713  	return 0;
6714  }
6715  
gaudi2_send_heartbeat(struct hl_device * hdev)6716  static int gaudi2_send_heartbeat(struct hl_device *hdev)
6717  {
6718  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6719  
6720  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6721  		return 0;
6722  
6723  	return hl_fw_send_heartbeat(hdev);
6724  }
6725  
6726  /* This is an internal helper function, used to update the KDMA mmu props.
6727   * Should be called with a proper kdma lock.
6728   */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)6729  static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6730  					   bool mmu_bypass, u32 asid)
6731  {
6732  	u32 rw_asid, rw_mmu_bp;
6733  
6734  	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6735  		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6736  
6737  	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6738  			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6739  
6740  	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6741  	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6742  }
6743  
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6744  static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6745  						u32 mon_payload, u32 sync_value)
6746  {
6747  	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6748  	u8 mask;
6749  
6750  	sob_offset = sob_id * 4;
6751  	mon_offset = mon_id * 4;
6752  
6753  	/* Reset the SOB value */
6754  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6755  
6756  	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6757  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6758  
6759  	/* Configure this address with CS index because CQ_EN is set */
6760  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6761  
6762  	sync_group_id = sob_id / 8;
6763  	mask = ~(1 << (sob_id & 0x7));
6764  	mode = 1; /* comparison mode is "equal to" */
6765  
6766  	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6767  	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6768  	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6769  	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6770  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6771  }
6772  
6773  /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6774  static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6775  					u64 src_addr, u64 dst_addr,
6776  					u32 size, bool is_memset)
6777  {
6778  	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6779  	struct hl_cq_entry *cq_base;
6780  	struct hl_cq *cq;
6781  	u64 comp_addr;
6782  	int rc;
6783  
6784  	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6785  				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6786  				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6787  
6788  	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6789  			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6790  
6791  	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6792  			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6793  
6794  	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6795  	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6796  	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6797  	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6798  	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6799  	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6800  	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6801  	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6802  
6803  	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6804  				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6805  
6806  	if (is_memset)
6807  		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6808  
6809  	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6810  
6811  	/* Wait for completion */
6812  	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6813  	cq_base = cq->kernel_address;
6814  	polling_addr = (u32 *)&cq_base[cq->ci];
6815  
6816  	if (hdev->pldm)
6817  		/* for each 1MB 20 second of timeout */
6818  		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6819  	else
6820  		timeout = KDMA_TIMEOUT_USEC;
6821  
6822  	/* Polling */
6823  	rc = hl_poll_timeout_memory(
6824  			hdev,
6825  			polling_addr,
6826  			status,
6827  			(status == 1),
6828  			1000,
6829  			timeout,
6830  			true);
6831  
6832  	*polling_addr = 0;
6833  
6834  	if (rc) {
6835  		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6836  		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6837  		return rc;
6838  	}
6839  
6840  	cq->ci = hl_cq_inc_ptr(cq->ci);
6841  
6842  	return 0;
6843  }
6844  
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6845  static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6846  {
6847  	u32 i;
6848  
6849  	for (i = 0 ; i < size ; i += sizeof(u32))
6850  		WREG32(addr + i, val);
6851  }
6852  
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6853  static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6854  {
6855  	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6856  
6857  	if (enable) {
6858  		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6859  		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6860  	} else {
6861  		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6862  		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6863  	}
6864  }
6865  
gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device * hdev,u32 hw_queue_id)6866  static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6867  {
6868  	return hdev->asic_prop.first_available_user_sob[0] +
6869  				hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6870  }
6871  
gaudi2_test_queue_clear(struct hl_device * hdev,u32 hw_queue_id)6872  static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6873  {
6874  	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6875  	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6876  
6877  	/* Reset the SOB value */
6878  	WREG32(sob_addr, 0);
6879  }
6880  
gaudi2_test_queue_send_msg_short(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val,struct gaudi2_queues_test_info * msg_info)6881  static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6882  					    struct gaudi2_queues_test_info *msg_info)
6883  {
6884  	u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6885  	u32 tmp, sob_base = 1;
6886  	struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6887  	size_t pkt_size = sizeof(struct packet_msg_short);
6888  	int rc;
6889  
6890  	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6891  		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6892  		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6893  		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6894  		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6895  
6896  	msg_short_pkt->value = cpu_to_le32(sob_val);
6897  	msg_short_pkt->ctl = cpu_to_le32(tmp);
6898  
6899  	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6900  	if (rc)
6901  		dev_err(hdev->dev,
6902  			"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6903  
6904  	return rc;
6905  }
6906  
gaudi2_test_queue_wait_completion(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val)6907  static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6908  {
6909  	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6910  	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6911  	u32 timeout_usec, tmp;
6912  	int rc;
6913  
6914  	if (hdev->pldm)
6915  		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6916  	else
6917  		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6918  
6919  	rc = hl_poll_timeout(
6920  			hdev,
6921  			sob_addr,
6922  			tmp,
6923  			(tmp == sob_val),
6924  			1000,
6925  			timeout_usec);
6926  
6927  	if (rc == -ETIMEDOUT) {
6928  		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6929  			hw_queue_id, tmp);
6930  		rc = -EIO;
6931  	}
6932  
6933  	return rc;
6934  }
6935  
gaudi2_test_cpu_queue(struct hl_device * hdev)6936  static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6937  {
6938  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6939  
6940  	/*
6941  	 * check capability here as send_cpu_message() won't update the result
6942  	 * value if no capability
6943  	 */
6944  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6945  		return 0;
6946  
6947  	return hl_fw_test_cpu_queue(hdev);
6948  }
6949  
gaudi2_test_queues(struct hl_device * hdev)6950  static int gaudi2_test_queues(struct hl_device *hdev)
6951  {
6952  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6953  	struct gaudi2_queues_test_info *msg_info;
6954  	u32 sob_val = 0x5a5a;
6955  	int i, rc;
6956  
6957  	/* send test message on all enabled Qs */
6958  	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6959  		if (!gaudi2_is_queue_enabled(hdev, i))
6960  			continue;
6961  
6962  		msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6963  		gaudi2_qman_set_test_mode(hdev, i, true);
6964  		gaudi2_test_queue_clear(hdev, i);
6965  		rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6966  		if (rc)
6967  			goto done;
6968  	}
6969  
6970  	rc = gaudi2_test_cpu_queue(hdev);
6971  	if (rc)
6972  		goto done;
6973  
6974  	/* verify that all messages were processed */
6975  	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6976  		if (!gaudi2_is_queue_enabled(hdev, i))
6977  			continue;
6978  
6979  		rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6980  		if (rc)
6981  			/* chip is not usable, no need for cleanups, just bail-out with error */
6982  			goto done;
6983  
6984  		gaudi2_test_queue_clear(hdev, i);
6985  		gaudi2_qman_set_test_mode(hdev, i, false);
6986  	}
6987  
6988  done:
6989  	return rc;
6990  }
6991  
gaudi2_compute_reset_late_init(struct hl_device * hdev)6992  static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6993  {
6994  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6995  	size_t irq_arr_size;
6996  	int rc;
6997  
6998  	gaudi2_init_arcs(hdev);
6999  
7000  	rc = gaudi2_scrub_arcs_dccm(hdev);
7001  	if (rc) {
7002  		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7003  		return rc;
7004  	}
7005  
7006  	gaudi2_init_security(hdev);
7007  
7008  	/* Unmask all IRQs since some could have been received during the soft reset */
7009  	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7010  	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7011  }
7012  
gaudi2_get_edma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7013  static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7014  		struct engines_data *e)
7015  {
7016  	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7017  	struct asic_fixed_properties *prop = &hdev->asic_prop;
7018  	unsigned long *mask = (unsigned long *) mask_arr;
7019  	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7020  	bool is_idle = true, is_eng_idle;
7021  	int engine_idx, i, j;
7022  	u64 offset;
7023  
7024  	if (e)
7025  		hl_engine_data_sprintf(e,
7026  			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7027  			"----  ----  -------  ------------  -------------  -------------\n");
7028  
7029  	for (i = 0; i < NUM_OF_DCORES; i++) {
7030  		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7031  			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7032  
7033  			if (!(prop->edma_enabled_mask & BIT(seq)))
7034  				continue;
7035  
7036  			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7037  					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7038  			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7039  
7040  			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7041  			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7042  
7043  			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7044  			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7045  			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7046  
7047  			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7048  					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7049  			is_idle &= is_eng_idle;
7050  
7051  			if (mask && !is_eng_idle)
7052  				set_bit(engine_idx, mask);
7053  
7054  			if (e)
7055  				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7056  							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7057  		}
7058  	}
7059  
7060  	return is_idle;
7061  }
7062  
gaudi2_get_pdma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7063  static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7064  		struct engines_data *e)
7065  {
7066  	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7067  	unsigned long *mask = (unsigned long *) mask_arr;
7068  	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7069  	bool is_idle = true, is_eng_idle;
7070  	int engine_idx, i;
7071  	u64 offset;
7072  
7073  	if (e)
7074  		hl_engine_data_sprintf(e,
7075  					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7076  					"----  -------  ------------  -------------  -------------\n");
7077  
7078  	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7079  		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7080  		offset = i * PDMA_OFFSET;
7081  		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7082  		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7083  
7084  		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7085  		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7086  		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7087  
7088  		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7089  				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7090  		is_idle &= is_eng_idle;
7091  
7092  		if (mask && !is_eng_idle)
7093  			set_bit(engine_idx, mask);
7094  
7095  		if (e)
7096  			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7097  						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7098  	}
7099  
7100  	return is_idle;
7101  }
7102  
gaudi2_get_nic_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7103  static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7104  		struct engines_data *e)
7105  {
7106  	unsigned long *mask = (unsigned long *) mask_arr;
7107  	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7108  	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7109  	bool is_idle = true, is_eng_idle;
7110  	int engine_idx, i;
7111  	u64 offset = 0;
7112  
7113  	/* NIC, twelve macros in Full chip */
7114  	if (e && hdev->nic_ports_mask)
7115  		hl_engine_data_sprintf(e,
7116  					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7117  					"---  -------  ------------  ----------\n");
7118  
7119  	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7120  		if (!(i & 1))
7121  			offset = i / 2 * NIC_OFFSET;
7122  		else
7123  			offset += NIC_QM_OFFSET;
7124  
7125  		if (!(hdev->nic_ports_mask & BIT(i)))
7126  			continue;
7127  
7128  		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7129  
7130  
7131  		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7132  		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7133  		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7134  
7135  		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7136  		is_idle &= is_eng_idle;
7137  
7138  		if (mask && !is_eng_idle)
7139  			set_bit(engine_idx, mask);
7140  
7141  		if (e)
7142  			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7143  						qm_glbl_sts0, qm_cgm_sts);
7144  	}
7145  
7146  	return is_idle;
7147  }
7148  
gaudi2_get_mme_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7149  static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7150  		struct engines_data *e)
7151  {
7152  	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7153  	unsigned long *mask = (unsigned long *) mask_arr;
7154  	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7155  	bool is_idle = true, is_eng_idle;
7156  	int engine_idx, i;
7157  	u64 offset;
7158  
7159  	if (e)
7160  		hl_engine_data_sprintf(e,
7161  					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7162  					"---  ----  -------  ------------  ---------------\n");
7163  	/* MME, one per Dcore */
7164  	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7165  		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7166  		offset = i * DCORE_OFFSET;
7167  
7168  		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7169  		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7170  		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7171  
7172  		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7173  		is_idle &= is_eng_idle;
7174  
7175  		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7176  		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7177  		is_idle &= is_eng_idle;
7178  
7179  		if (e)
7180  			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7181  				is_eng_idle ? "Y" : "N",
7182  				qm_glbl_sts0,
7183  				mme_arch_sts);
7184  
7185  		if (mask && !is_eng_idle)
7186  			set_bit(engine_idx, mask);
7187  	}
7188  
7189  	return is_idle;
7190  }
7191  
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7192  static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7193  					struct iterate_module_ctx *ctx)
7194  {
7195  	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7196  	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7197  	bool is_eng_idle;
7198  	int engine_idx;
7199  
7200  	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7201  		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7202  	else
7203  		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7204  				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7205  
7206  	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7207  	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7208  	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7209  	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7210  
7211  	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7212  						IS_TPC_IDLE(tpc_cfg_sts);
7213  	*(idle_data->is_idle) &= is_eng_idle;
7214  
7215  	if (idle_data->mask && !is_eng_idle)
7216  		set_bit(engine_idx, idle_data->mask);
7217  
7218  	if (idle_data->e)
7219  		hl_engine_data_sprintf(idle_data->e,
7220  					idle_data->tpc_fmt, dcore, inst,
7221  					is_eng_idle ? "Y" : "N",
7222  					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7223  }
7224  
gaudi2_get_tpc_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7225  static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7226  		struct engines_data *e)
7227  {
7228  	struct asic_fixed_properties *prop = &hdev->asic_prop;
7229  	unsigned long *mask = (unsigned long *) mask_arr;
7230  	bool is_idle = true;
7231  
7232  	struct gaudi2_tpc_idle_data tpc_idle_data = {
7233  		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7234  		.e = e,
7235  		.mask = mask,
7236  		.is_idle = &is_idle,
7237  	};
7238  	struct iterate_module_ctx tpc_iter = {
7239  		.fn = &gaudi2_is_tpc_engine_idle,
7240  		.data = &tpc_idle_data,
7241  	};
7242  
7243  	if (e && prop->tpc_enabled_mask)
7244  		hl_engine_data_sprintf(e,
7245  			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7246  			"----  ---  -------  ------------  ----------  ------\n");
7247  
7248  	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7249  
7250  	return *tpc_idle_data.is_idle;
7251  }
7252  
gaudi2_get_decoder_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7253  static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7254  		struct engines_data *e)
7255  {
7256  	struct asic_fixed_properties *prop = &hdev->asic_prop;
7257  	unsigned long *mask = (unsigned long *) mask_arr;
7258  	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7259  	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7260  	bool is_idle = true, is_eng_idle;
7261  	u32 dec_swreg15, dec_enabled_bit;
7262  	int engine_idx, i, j;
7263  	u64 offset;
7264  
7265  	/* Decoders, two each Dcore and two shared PCIe decoders */
7266  	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7267  		hl_engine_data_sprintf(e,
7268  			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7269  			"----  ---  -------  ---------------\n");
7270  
7271  	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7272  		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7273  			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7274  			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7275  				continue;
7276  
7277  			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7278  					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7279  			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7280  
7281  			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7282  			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7283  			is_idle &= is_eng_idle;
7284  
7285  			if (mask && !is_eng_idle)
7286  				set_bit(engine_idx, mask);
7287  
7288  			if (e)
7289  				hl_engine_data_sprintf(e, dec_fmt, i, j,
7290  							is_eng_idle ? "Y" : "N", dec_swreg15);
7291  		}
7292  	}
7293  
7294  	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7295  		hl_engine_data_sprintf(e,
7296  			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7297  			"--------  -------  ---------------\n");
7298  
7299  	/* Check shared(PCIe) decoders */
7300  	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7301  		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7302  		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7303  			continue;
7304  
7305  		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7306  		offset = i * DCORE_DEC_OFFSET;
7307  		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7308  		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7309  		is_idle &= is_eng_idle;
7310  
7311  		if (mask && !is_eng_idle)
7312  			set_bit(engine_idx, mask);
7313  
7314  		if (e)
7315  			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7316  						is_eng_idle ? "Y" : "N", dec_swreg15);
7317  	}
7318  
7319  	return is_idle;
7320  }
7321  
gaudi2_get_rotator_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7322  static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7323  		struct engines_data *e)
7324  {
7325  	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7326  	unsigned long *mask = (unsigned long *) mask_arr;
7327  	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7328  	bool is_idle = true, is_eng_idle;
7329  	int engine_idx, i;
7330  	u64 offset;
7331  
7332  	if (e)
7333  		hl_engine_data_sprintf(e,
7334  			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7335  			"----  ---  -------  ------------  ------------  ----------\n");
7336  
7337  	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7338  		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7339  
7340  		offset = i * ROT_OFFSET;
7341  
7342  		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7343  		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7344  		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7345  
7346  		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7347  		is_idle &= is_eng_idle;
7348  
7349  		if (mask && !is_eng_idle)
7350  			set_bit(engine_idx, mask);
7351  
7352  		if (e)
7353  			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7354  						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7355  	}
7356  
7357  	return is_idle;
7358  }
7359  
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7360  static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7361  					struct engines_data *e)
7362  {
7363  	bool is_idle = true;
7364  
7365  	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7366  	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7367  	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7368  	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7369  	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7370  	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7371  	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7372  
7373  	return is_idle;
7374  }
7375  
gaudi2_hw_queues_lock(struct hl_device * hdev)7376  static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7377  	__acquires(&gaudi2->hw_queues_lock)
7378  {
7379  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7380  
7381  	spin_lock(&gaudi2->hw_queues_lock);
7382  }
7383  
gaudi2_hw_queues_unlock(struct hl_device * hdev)7384  static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7385  	__releases(&gaudi2->hw_queues_lock)
7386  {
7387  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7388  
7389  	spin_unlock(&gaudi2->hw_queues_lock);
7390  }
7391  
gaudi2_get_pci_id(struct hl_device * hdev)7392  static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7393  {
7394  	return hdev->pdev->device;
7395  }
7396  
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)7397  static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7398  {
7399  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7400  
7401  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7402  		return 0;
7403  
7404  	return hl_fw_get_eeprom_data(hdev, data, max_size);
7405  }
7406  
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)7407  static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7408  {
7409  	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7410  }
7411  
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7412  static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7413  {
7414  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7415  
7416  	if (aggregate) {
7417  		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7418  		return gaudi2->events_stat_aggregate;
7419  	}
7420  
7421  	*size = (u32) sizeof(gaudi2->events_stat);
7422  	return gaudi2->events_stat;
7423  }
7424  
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7425  static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7426  				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7427  {
7428  	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7429  			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7430  
7431  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7432  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7433  
7434  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7435  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7436  
7437  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7438  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7439  
7440  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7441  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7442  
7443  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7444  	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7445  }
7446  
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)7447  static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7448  {
7449  	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7450  			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7451  	struct asic_fixed_properties *prop = &hdev->asic_prop;
7452  	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7453  	u32 vdec_id, i, ports_offset, reg_val;
7454  	u8 edma_seq_base;
7455  
7456  	/* EDMA */
7457  	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7458  	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7459  		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7460  		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7461  		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7462  		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7463  	}
7464  
7465  	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7466  		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7467  		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7468  		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7469  		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7470  	}
7471  
7472  	/* Sync Mngr */
7473  	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7474  	/*
7475  	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7476  	 * for any access type
7477  	 */
7478  	if (dcore_id > 0) {
7479  		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7480  			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7481  		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7482  		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7483  	}
7484  
7485  	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7486  	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7487  
7488  	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7489  		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7490  		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7491  				dcore_offset + ports_offset, 0);
7492  		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7493  				dcore_offset + ports_offset, rw_asid);
7494  	}
7495  
7496  	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7497  		ports_offset = i * DCORE_MME_WB_OFFSET;
7498  		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7499  				dcore_offset + ports_offset, 0);
7500  		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7501  				dcore_offset + ports_offset, rw_asid);
7502  	}
7503  
7504  	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7505  	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7506  
7507  	/*
7508  	 * Decoders
7509  	 */
7510  	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7511  		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7512  			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7513  	}
7514  }
7515  
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7516  static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7517  				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7518  {
7519  	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7520  
7521  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7522  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7523  
7524  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7525  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7526  
7527  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7528  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7529  
7530  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7531  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7532  
7533  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7534  	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7535  }
7536  
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)7537  static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7538  							u32 rw_asid, u32 rw_mmu_bp)
7539  {
7540  	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7541  
7542  	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7543  	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7544  }
7545  
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)7546  static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7547  {
7548  	u32 reg_base, reg_offset, reg_val = 0;
7549  
7550  	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7551  
7552  	/* Enable MMU and configure asid for all relevant ARC regions */
7553  	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7554  	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7555  
7556  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7557  	WREG32(reg_base + reg_offset, reg_val);
7558  
7559  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7560  	WREG32(reg_base + reg_offset, reg_val);
7561  
7562  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7563  	WREG32(reg_base + reg_offset, reg_val);
7564  
7565  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7566  	WREG32(reg_base + reg_offset, reg_val);
7567  
7568  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7569  	WREG32(reg_base + reg_offset, reg_val);
7570  
7571  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7572  	WREG32(reg_base + reg_offset, reg_val);
7573  
7574  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7575  	WREG32(reg_base + reg_offset, reg_val);
7576  
7577  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7578  	WREG32(reg_base + reg_offset, reg_val);
7579  
7580  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7581  	WREG32(reg_base + reg_offset, reg_val);
7582  
7583  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7584  	WREG32(reg_base + reg_offset, reg_val);
7585  
7586  	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7587  	WREG32(reg_base + reg_offset, reg_val);
7588  }
7589  
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)7590  static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7591  {
7592  	int i;
7593  
7594  	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7595  		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7596  
7597  	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7598  		gaudi2_arc_mmu_prepare(hdev, i, asid);
7599  
7600  	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7601  		if (!gaudi2_is_queue_enabled(hdev, i))
7602  			continue;
7603  
7604  		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7605  	}
7606  
7607  	return 0;
7608  }
7609  
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)7610  static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7611  {
7612  	struct asic_fixed_properties *prop = &hdev->asic_prop;
7613  	u32 rw_asid, offset;
7614  	int rc, i;
7615  
7616  	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7617  			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7618  
7619  	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7620  	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7621  	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7622  	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7623  
7624  	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7625  	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7626  	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7627  	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7628  
7629  	/* ROT */
7630  	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7631  		offset = i * ROT_OFFSET;
7632  		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7633  		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7634  		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7635  		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7636  		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7637  	}
7638  
7639  	/* Shared Decoders are the last bits in the decoders mask */
7640  	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7641  		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7642  
7643  	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7644  		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7645  
7646  	/* arc farm arc dup eng */
7647  	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7648  		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7649  
7650  	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7651  	if (rc)
7652  		return rc;
7653  
7654  	return 0;
7655  }
7656  
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7657  static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7658  					struct iterate_module_ctx *ctx)
7659  {
7660  	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7661  
7662  	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7663  	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7664  	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7665  	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7666  }
7667  
7668  /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)7669  static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7670  {
7671  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7672  	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7673  	struct iterate_module_ctx tpc_iter = {
7674  		.fn = &gaudi2_tpc_mmu_prepare,
7675  		.data = &tpc_mmu_data,
7676  	};
7677  	int rc, i;
7678  
7679  	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7680  		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7681  		return -EINVAL;
7682  	}
7683  
7684  	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7685  		return 0;
7686  
7687  	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7688  	if (rc)
7689  		return rc;
7690  
7691  	/* configure DCORE MMUs */
7692  	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7693  				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7694  	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7695  	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7696  		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7697  
7698  	return 0;
7699  }
7700  
is_info_event(u32 event)7701  static inline bool is_info_event(u32 event)
7702  {
7703  	switch (event) {
7704  	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7705  	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7706  
7707  	/* return in case of NIC status event - these events are received periodically and not as
7708  	 * an indication to an error.
7709  	 */
7710  	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7711  		return true;
7712  	default:
7713  		return false;
7714  	}
7715  }
7716  
gaudi2_print_event(struct hl_device * hdev,u16 event_type,bool ratelimited,const char * fmt,...)7717  static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7718  			bool ratelimited, const char *fmt, ...)
7719  {
7720  	struct va_format vaf;
7721  	va_list args;
7722  
7723  	va_start(args, fmt);
7724  	vaf.fmt = fmt;
7725  	vaf.va = &args;
7726  
7727  	if (ratelimited)
7728  		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7729  			gaudi2_irq_map_table[event_type].valid ?
7730  			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7731  	else
7732  		dev_err(hdev->dev, "%s: %pV\n",
7733  			gaudi2_irq_map_table[event_type].valid ?
7734  			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7735  
7736  	va_end(args);
7737  }
7738  
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7739  static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7740  		struct hl_eq_ecc_data *ecc_data)
7741  {
7742  	u64 ecc_address = 0, ecc_syndrom = 0;
7743  	u8 memory_wrapper_idx = 0;
7744  
7745  	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7746  	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7747  	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7748  
7749  	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7750  		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7751  		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7752  
7753  	return !!ecc_data->is_critical;
7754  }
7755  
print_lower_qman_data_on_err(struct hl_device * hdev,u64 qman_base)7756  static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
7757  {
7758  	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
7759  	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
7760  
7761  	lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
7762  	hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
7763  	cq_ptr = ((u64) hi) << 32 | lo;
7764  	cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
7765  
7766  	lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
7767  	hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
7768  	arc_cq_ptr = ((u64) hi) << 32 | lo;
7769  	arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
7770  
7771  	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7772  	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7773  	cp_current_inst = ((u64) hi) << 32 | lo;
7774  
7775  	dev_info(hdev->dev,
7776  		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
7777  		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
7778  }
7779  
gaudi2_handle_qman_err_generic(struct hl_device * hdev,u16 event_type,u64 qman_base,u32 qid_base)7780  static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7781  							u64 qman_base, u32 qid_base)
7782  {
7783  	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7784  	u64 glbl_sts_addr, arb_err_addr;
7785  	char reg_desc[32];
7786  
7787  	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7788  	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7789  
7790  	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7791  	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7792  		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7793  
7794  		if (!glbl_sts_val)
7795  			continue;
7796  
7797  		if (i == QMAN_STREAMS) {
7798  			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7799  			num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7800  		} else {
7801  			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7802  			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7803  		}
7804  
7805  		for (j = 0 ; j < num_error_causes ; j++)
7806  			if (glbl_sts_val & BIT(j)) {
7807  				gaudi2_print_event(hdev, event_type, true,
7808  					"%s. err cause: %s", reg_desc,
7809  					i == QMAN_STREAMS ?
7810  					gaudi2_lower_qman_error_cause[j] :
7811  					gaudi2_qman_error_cause[j]);
7812  				error_count++;
7813  			}
7814  
7815  		if (i == QMAN_STREAMS)
7816  			print_lower_qman_data_on_err(hdev, qman_base);
7817  	}
7818  
7819  	arb_err_val = RREG32(arb_err_addr);
7820  
7821  	if (!arb_err_val)
7822  		goto out;
7823  
7824  	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7825  		if (arb_err_val & BIT(j)) {
7826  			gaudi2_print_event(hdev, event_type, true,
7827  				"ARB_ERR. err cause: %s",
7828  				gaudi2_qman_arb_error_cause[j]);
7829  			error_count++;
7830  		}
7831  	}
7832  
7833  out:
7834  	return error_count;
7835  }
7836  
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7837  static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7838  			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7839  			enum gaudi2_engine_id id, u64 *event_mask)
7840  {
7841  	u32 razwi_hi, razwi_lo, razwi_xy;
7842  	u16 eng_id = id;
7843  	u8 rd_wr_flag;
7844  
7845  	if (is_write) {
7846  		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7847  		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7848  		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7849  		rd_wr_flag = HL_RAZWI_WRITE;
7850  	} else {
7851  		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7852  		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7853  		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7854  		rd_wr_flag = HL_RAZWI_READ;
7855  	}
7856  
7857  	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7858  				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7859  
7860  	dev_err_ratelimited(hdev->dev,
7861  		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7862  		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7863  }
7864  
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7865  static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7866  			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7867  			enum gaudi2_engine_id id, u64 *event_mask)
7868  {
7869  	u64 razwi_addr = CFG_BASE;
7870  	u32 razwi_xy;
7871  	u16 eng_id = id;
7872  	u8 rd_wr_flag;
7873  
7874  	if (is_write) {
7875  		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7876  		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7877  		rd_wr_flag = HL_RAZWI_WRITE;
7878  	} else {
7879  		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7880  		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7881  		rd_wr_flag = HL_RAZWI_READ;
7882  	}
7883  
7884  	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7885  	dev_err_ratelimited(hdev->dev,
7886  				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7887  				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7888  						razwi_xy);
7889  }
7890  
gaudi2_razwi_calc_engine_id(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx)7891  static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7892  						enum razwi_event_sources module, u8 module_idx)
7893  {
7894  	switch (module) {
7895  	case RAZWI_TPC:
7896  		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7897  			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7898  		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7899  				(module_idx % NUM_OF_TPC_PER_DCORE) +
7900  				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7901  
7902  	case RAZWI_MME:
7903  		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7904  			(module_idx * ENGINE_ID_DCORE_OFFSET));
7905  
7906  	case RAZWI_EDMA:
7907  		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7908  			(module_idx % NUM_OF_EDMA_PER_DCORE));
7909  
7910  	case RAZWI_PDMA:
7911  		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7912  
7913  	case RAZWI_NIC:
7914  		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7915  
7916  	case RAZWI_DEC:
7917  		if (module_idx == 8)
7918  			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7919  
7920  		if (module_idx == 9)
7921  			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7922  					;
7923  		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7924  				(module_idx % NUM_OF_DEC_PER_DCORE) +
7925  				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7926  
7927  	case RAZWI_ROT:
7928  		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7929  
7930  	default:
7931  		return GAUDI2_ENGINE_ID_SIZE;
7932  	}
7933  }
7934  
7935  /*
7936   * This function handles RR(Range register) hit events.
7937   * raised be initiators not PSOC RAZWI.
7938   */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,u64 * event_mask)7939  static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7940  				enum razwi_event_sources module, u8 module_idx,
7941  				u8 module_sub_idx, u64 *event_mask)
7942  {
7943  	bool via_sft = false;
7944  	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
7945  	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
7946  	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7947  	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7948  	char initiator_name[64];
7949  
7950  	switch (module) {
7951  	case RAZWI_TPC:
7952  		sprintf(initiator_name, "TPC_%u", module_idx);
7953  		if (hdev->tpc_binning) {
7954  			binned_idx = __ffs(hdev->tpc_binning);
7955  			if (binned_idx == module_idx)
7956  				module_idx = TPC_ID_DCORE0_TPC6;
7957  		}
7958  
7959  		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
7960  
7961  		if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
7962  				!hdev->asic_prop.fw_security_enabled &&
7963  				((module_idx == 0) || (module_idx == 1)))
7964  			lbw_rtr_id = DCORE0_RTR0;
7965  		else
7966  			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
7967  		break;
7968  	case RAZWI_MME:
7969  		sprintf(initiator_name, "MME_%u", module_idx);
7970  		switch (module_sub_idx) {
7971  		case MME_WAP0:
7972  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7973  			break;
7974  		case MME_WAP1:
7975  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7976  			break;
7977  		case MME_WRITE:
7978  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7979  			break;
7980  		case MME_READ:
7981  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7982  			break;
7983  		case MME_SBTE0:
7984  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7985  			break;
7986  		case MME_SBTE1:
7987  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7988  			break;
7989  		case MME_SBTE2:
7990  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7991  			break;
7992  		case MME_SBTE3:
7993  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7994  			break;
7995  		case MME_SBTE4:
7996  			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7997  			break;
7998  		default:
7999  			return;
8000  		}
8001  		lbw_rtr_id = hbw_rtr_id;
8002  		break;
8003  	case RAZWI_EDMA:
8004  		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8005  		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8006  		/* SFT has separate MSTR_IF for LBW, only there we can
8007  		 * read the LBW razwi related registers
8008  		 */
8009  		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8010  								dcore_id * SFT_DCORE_OFFSET;
8011  		via_sft = true;
8012  		sprintf(initiator_name, "EDMA_%u", module_idx);
8013  		break;
8014  	case RAZWI_PDMA:
8015  		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8016  		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8017  		sprintf(initiator_name, "PDMA_%u", module_idx);
8018  		break;
8019  	case RAZWI_NIC:
8020  		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8021  		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8022  		sprintf(initiator_name, "NIC_%u", module_idx);
8023  		break;
8024  	case RAZWI_DEC:
8025  		sprintf(initiator_name, "DEC_%u", module_idx);
8026  		if (hdev->decoder_binning) {
8027  			binned_idx = __ffs(hdev->decoder_binning);
8028  			if (binned_idx == module_idx)
8029  				module_idx = DEC_ID_PCIE_VDEC1;
8030  		}
8031  		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8032  		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8033  		break;
8034  	case RAZWI_ROT:
8035  		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8036  		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8037  		sprintf(initiator_name, "ROT_%u", module_idx);
8038  		break;
8039  	default:
8040  		return;
8041  	}
8042  
8043  	/* Find router mstr_if register base */
8044  	if (!via_sft) {
8045  		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8046  		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8047  		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8048  				dcore_id * DCORE_OFFSET +
8049  				dcore_rtr_id * DCORE_RTR_OFFSET +
8050  				RTR_MSTR_IF_OFFSET;
8051  		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8052  				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8053  	}
8054  
8055  	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8056  	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8057  	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8058  	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8059  	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8060  
8061  	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8062  	if (hbw_shrd_aw) {
8063  		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8064  						initiator_name, eng_id, event_mask);
8065  
8066  		/* Clear event indication */
8067  		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8068  	}
8069  
8070  	if (hbw_shrd_ar) {
8071  		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8072  						initiator_name, eng_id, event_mask);
8073  
8074  		/* Clear event indication */
8075  		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8076  	}
8077  
8078  	if (lbw_shrd_aw) {
8079  		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8080  						initiator_name, eng_id, event_mask);
8081  
8082  		/* Clear event indication */
8083  		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8084  	}
8085  
8086  	if (lbw_shrd_ar) {
8087  		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8088  						initiator_name, eng_id, event_mask);
8089  
8090  		/* Clear event indication */
8091  		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8092  	}
8093  }
8094  
gaudi2_check_if_razwi_happened(struct hl_device * hdev)8095  static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8096  {
8097  	struct asic_fixed_properties *prop = &hdev->asic_prop;
8098  	u8 mod_idx, sub_mod;
8099  
8100  	/* check all TPCs */
8101  	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8102  		if (prop->tpc_enabled_mask & BIT(mod_idx))
8103  			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8104  	}
8105  
8106  	/* check all MMEs */
8107  	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8108  		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8109  			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8110  									sub_mod, NULL);
8111  
8112  	/* check all EDMAs */
8113  	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8114  		if (prop->edma_enabled_mask & BIT(mod_idx))
8115  			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8116  
8117  	/* check all PDMAs */
8118  	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8119  		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8120  
8121  	/* check all NICs */
8122  	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8123  		if (hdev->nic_ports_mask & BIT(mod_idx))
8124  			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8125  								NULL);
8126  
8127  	/* check all DECs */
8128  	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8129  		if (prop->decoder_enabled_mask & BIT(mod_idx))
8130  			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8131  
8132  	/* check all ROTs */
8133  	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8134  		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8135  }
8136  
gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info * razwi_info,u32 array_size,u32 axuser_xy,u32 * base,u16 * eng_id,char * eng_name)8137  static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8138  						u32 axuser_xy, u32 *base, u16 *eng_id,
8139  						char *eng_name)
8140  {
8141  
8142  	int i, num_of_eng = 0;
8143  	u16 str_size = 0;
8144  
8145  	for (i = 0 ; i < array_size ; i++) {
8146  		if (axuser_xy != razwi_info[i].axuser_xy)
8147  			continue;
8148  
8149  		eng_id[num_of_eng] = razwi_info[i].eng_id;
8150  		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8151  		if (!num_of_eng)
8152  			str_size += scnprintf(eng_name + str_size,
8153  						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8154  						razwi_info[i].eng_name);
8155  		else
8156  			str_size += scnprintf(eng_name + str_size,
8157  						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8158  						razwi_info[i].eng_name);
8159  		num_of_eng++;
8160  	}
8161  
8162  	return num_of_eng;
8163  }
8164  
gaudi2_handle_psoc_razwi_happened(struct hl_device * hdev,u32 razwi_reg,u64 * event_mask)8165  static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8166  						u64 *event_mask)
8167  {
8168  	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8169  	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8170  	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8171  	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8172  	bool razwi_happened = false;
8173  	u64 addr;
8174  	int i;
8175  
8176  	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8177  							axuser_xy, base, eng_id, eng_name_str);
8178  
8179  	/* If no match for XY coordinates, try to find it in MME razwi table */
8180  	if (!num_of_eng) {
8181  		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8182  		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8183  								ARRAY_SIZE(mme_razwi_info),
8184  								axuser_xy, base, eng_id,
8185  								eng_name_str);
8186  	}
8187  
8188  	for  (i = 0 ; i < num_of_eng ; i++) {
8189  		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8190  			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8191  			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8192  			addr = ((u64)addr_hi << 32) + addr_lo;
8193  			if (addr) {
8194  				dev_err(hdev->dev,
8195  					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8196  					eng_name_str, addr);
8197  				hl_handle_razwi(hdev, addr, &eng_id[0],
8198  					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8199  				razwi_happened = true;
8200  			}
8201  		}
8202  
8203  		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8204  			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8205  			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8206  			addr = ((u64)addr_hi << 32) + addr_lo;
8207  			if (addr) {
8208  				dev_err(hdev->dev,
8209  					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8210  					eng_name_str, addr);
8211  				hl_handle_razwi(hdev, addr, &eng_id[0],
8212  					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8213  				razwi_happened = true;
8214  			}
8215  		}
8216  
8217  		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8218  			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8219  			if (addr_lo) {
8220  				dev_err(hdev->dev,
8221  					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8222  					eng_name_str, addr_lo);
8223  				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8224  					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8225  				razwi_happened = true;
8226  			}
8227  		}
8228  
8229  		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8230  			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8231  			if (addr_lo) {
8232  				dev_err(hdev->dev,
8233  						"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8234  						eng_name_str, addr_lo);
8235  				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8236  					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8237  				razwi_happened = true;
8238  			}
8239  		}
8240  		/* In common case the loop will break, when there is only one engine id, or
8241  		 * several engines with the same router. The exceptional case is with psoc razwi
8242  		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8243  		 * interfaces of sft router). In this case, maybe the first router won't hold info
8244  		 * and we will need to iterate on the other router.
8245  		 */
8246  		if (razwi_happened)
8247  			break;
8248  	}
8249  
8250  	return razwi_happened;
8251  }
8252  
8253  /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev,u64 * event_mask)8254  static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8255  {
8256  	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8257  
8258  	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8259  		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8260  		if (!razwi_intr)
8261  			return 0;
8262  	}
8263  
8264  	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8265  
8266  	dev_err_ratelimited(hdev->dev,
8267  		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8268  		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8269  		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8270  		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8271  		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8272  		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8273  
8274  	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8275  		error_count++;
8276  	else
8277  		dev_err_ratelimited(hdev->dev,
8278  				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8279  				razwi_mask_info);
8280  
8281  	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8282  	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8283  		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8284  
8285  	return error_count;
8286  }
8287  
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base,u16 event_type)8288  static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8289  {
8290  	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8291  
8292  	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8293  
8294  	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8295  		if (sts_val & BIT(i)) {
8296  			gaudi2_print_event(hdev, event_type, true,
8297  				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8298  			sts_clr_val |= BIT(i);
8299  			error_count++;
8300  		}
8301  	}
8302  
8303  	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8304  
8305  	return error_count;
8306  }
8307  
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,bool extended_err_check,u64 * event_mask)8308  static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8309  					bool extended_err_check, u64 *event_mask)
8310  {
8311  	enum razwi_event_sources module;
8312  	u32 error_count = 0;
8313  	u64 qman_base;
8314  	u8 index;
8315  
8316  	switch (event_type) {
8317  	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8318  		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8319  		qman_base = mmDCORE0_TPC0_QM_BASE +
8320  				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8321  				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8322  		module = RAZWI_TPC;
8323  		break;
8324  	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8325  		qman_base = mmDCORE0_TPC6_QM_BASE;
8326  		module = RAZWI_TPC;
8327  		break;
8328  	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8329  	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8330  	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8331  	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8332  		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8333  				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8334  						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8335  		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8336  		module = RAZWI_MME;
8337  		break;
8338  	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8339  	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8340  		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8341  		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8342  		module = RAZWI_PDMA;
8343  		break;
8344  	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8345  	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8346  		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8347  		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8348  		module = RAZWI_ROT;
8349  		break;
8350  	default:
8351  		return 0;
8352  	}
8353  
8354  	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8355  
8356  	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8357  	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8358  			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8359  		error_count += _gaudi2_handle_qm_sei_err(hdev,
8360  					qman_base + NIC_QM_OFFSET, event_type);
8361  
8362  	if (extended_err_check) {
8363  		/* check if RAZWI happened */
8364  		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8365  		hl_check_for_glbl_errors(hdev);
8366  	}
8367  
8368  	return error_count;
8369  }
8370  
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8371  static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8372  {
8373  	u32 qid_base, error_count = 0;
8374  	u64 qman_base;
8375  	u8 index = 0;
8376  
8377  	switch (event_type) {
8378  	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8379  		index = event_type - GAUDI2_EVENT_TPC0_QM;
8380  		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8381  		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8382  		break;
8383  	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8384  		index = event_type - GAUDI2_EVENT_TPC6_QM;
8385  		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8386  		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8387  		break;
8388  	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8389  		index = event_type - GAUDI2_EVENT_TPC12_QM;
8390  		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8391  		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8392  		break;
8393  	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8394  		index = event_type - GAUDI2_EVENT_TPC18_QM;
8395  		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8396  		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8397  		break;
8398  	case GAUDI2_EVENT_TPC24_QM:
8399  		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8400  		qman_base = mmDCORE0_TPC6_QM_BASE;
8401  		break;
8402  	case GAUDI2_EVENT_MME0_QM:
8403  		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8404  		qman_base = mmDCORE0_MME_QM_BASE;
8405  		break;
8406  	case GAUDI2_EVENT_MME1_QM:
8407  		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8408  		qman_base = mmDCORE1_MME_QM_BASE;
8409  		break;
8410  	case GAUDI2_EVENT_MME2_QM:
8411  		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8412  		qman_base = mmDCORE2_MME_QM_BASE;
8413  		break;
8414  	case GAUDI2_EVENT_MME3_QM:
8415  		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8416  		qman_base = mmDCORE3_MME_QM_BASE;
8417  		break;
8418  	case GAUDI2_EVENT_HDMA0_QM:
8419  		index = 0;
8420  		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8421  		qman_base = mmDCORE0_EDMA0_QM_BASE;
8422  		break;
8423  	case GAUDI2_EVENT_HDMA1_QM:
8424  		index = 1;
8425  		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8426  		qman_base = mmDCORE0_EDMA1_QM_BASE;
8427  		break;
8428  	case GAUDI2_EVENT_HDMA2_QM:
8429  		index = 2;
8430  		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8431  		qman_base = mmDCORE1_EDMA0_QM_BASE;
8432  		break;
8433  	case GAUDI2_EVENT_HDMA3_QM:
8434  		index = 3;
8435  		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8436  		qman_base = mmDCORE1_EDMA1_QM_BASE;
8437  		break;
8438  	case GAUDI2_EVENT_HDMA4_QM:
8439  		index = 4;
8440  		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8441  		qman_base = mmDCORE2_EDMA0_QM_BASE;
8442  		break;
8443  	case GAUDI2_EVENT_HDMA5_QM:
8444  		index = 5;
8445  		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8446  		qman_base = mmDCORE2_EDMA1_QM_BASE;
8447  		break;
8448  	case GAUDI2_EVENT_HDMA6_QM:
8449  		index = 6;
8450  		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8451  		qman_base = mmDCORE3_EDMA0_QM_BASE;
8452  		break;
8453  	case GAUDI2_EVENT_HDMA7_QM:
8454  		index = 7;
8455  		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8456  		qman_base = mmDCORE3_EDMA1_QM_BASE;
8457  		break;
8458  	case GAUDI2_EVENT_PDMA0_QM:
8459  		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8460  		qman_base = mmPDMA0_QM_BASE;
8461  		break;
8462  	case GAUDI2_EVENT_PDMA1_QM:
8463  		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8464  		qman_base = mmPDMA1_QM_BASE;
8465  		break;
8466  	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8467  		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8468  		qman_base = mmROT0_QM_BASE;
8469  		break;
8470  	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8471  		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8472  		qman_base = mmROT1_QM_BASE;
8473  		break;
8474  	default:
8475  		return 0;
8476  	}
8477  
8478  	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8479  
8480  	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8481  	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8482  		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8483  		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8484  	}
8485  
8486  	hl_check_for_glbl_errors(hdev);
8487  
8488  	return error_count;
8489  }
8490  
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev,u16 event_type)8491  static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8492  {
8493  	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8494  
8495  	for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8496  		sts_clr_val = 0;
8497  		sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8498  				(arc_farm * ARC_FARM_OFFSET));
8499  
8500  		for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8501  			if (sts_val & BIT(i)) {
8502  				gaudi2_print_event(hdev, event_type, true,
8503  						"ARC FARM ARC %u err cause: %s",
8504  						arc_farm, gaudi2_arc_sei_error_cause[i]);
8505  				sts_clr_val |= BIT(i);
8506  				error_count++;
8507  			}
8508  		}
8509  		WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8510  				sts_clr_val);
8511  	}
8512  
8513  	hl_check_for_glbl_errors(hdev);
8514  
8515  	return error_count;
8516  }
8517  
gaudi2_handle_cpu_sei_err(struct hl_device * hdev,u16 event_type)8518  static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8519  {
8520  	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8521  
8522  	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8523  
8524  	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8525  		if (sts_val & BIT(i)) {
8526  			gaudi2_print_event(hdev, event_type, true,
8527  				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8528  			sts_clr_val |= BIT(i);
8529  			error_count++;
8530  		}
8531  	}
8532  
8533  	hl_check_for_glbl_errors(hdev);
8534  
8535  	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8536  
8537  	return error_count;
8538  }
8539  
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8540  static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8541  					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8542  					u64 *event_mask)
8543  {
8544  	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8545  	u32 error_count = 0;
8546  	int i;
8547  
8548  	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8549  		if (intr_cause_data & BIT(i)) {
8550  			gaudi2_print_event(hdev, event_type, true,
8551  				"err cause: %s", guadi2_rot_error_cause[i]);
8552  			error_count++;
8553  		}
8554  
8555  	/* check if RAZWI happened */
8556  	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8557  	hl_check_for_glbl_errors(hdev);
8558  
8559  	return error_count;
8560  }
8561  
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8562  static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8563  					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8564  					u64 *event_mask)
8565  {
8566  	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8567  	u32 error_count = 0;
8568  	int i;
8569  
8570  	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8571  		if (intr_cause_data & BIT(i)) {
8572  			gaudi2_print_event(hdev, event_type, true,
8573  				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8574  			error_count++;
8575  		}
8576  
8577  	/* check if RAZWI happened */
8578  	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8579  	hl_check_for_glbl_errors(hdev);
8580  
8581  	return error_count;
8582  }
8583  
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,u16 event_type,u64 * event_mask)8584  static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8585  					u64 *event_mask)
8586  {
8587  	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8588  	int i;
8589  
8590  	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8591  		/* DCORE DEC */
8592  		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8593  				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8594  				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8595  	else
8596  		/* PCIE DEC */
8597  		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8598  				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8599  
8600  	sts_val = RREG32(sts_addr);
8601  
8602  	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8603  		if (sts_val & BIT(i)) {
8604  			gaudi2_print_event(hdev, event_type, true,
8605  				"err cause: %s", gaudi2_dec_error_cause[i]);
8606  			sts_clr_val |= BIT(i);
8607  			error_count++;
8608  		}
8609  	}
8610  
8611  	/* check if RAZWI happened */
8612  	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8613  	hl_check_for_glbl_errors(hdev);
8614  
8615  	/* Write 1 clear errors */
8616  	WREG32(sts_addr, sts_clr_val);
8617  
8618  	return error_count;
8619  }
8620  
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8621  static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8622  					u64 *event_mask)
8623  {
8624  	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8625  	int i;
8626  
8627  	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8628  	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8629  
8630  	sts_val = RREG32(sts_addr);
8631  
8632  	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8633  		if (sts_val & BIT(i)) {
8634  			gaudi2_print_event(hdev, event_type, true,
8635  				"err cause: %s", guadi2_mme_error_cause[i]);
8636  			sts_clr_val |= BIT(i);
8637  			error_count++;
8638  		}
8639  	}
8640  
8641  	/* check if RAZWI happened */
8642  	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8643  		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8644  
8645  	hl_check_for_glbl_errors(hdev);
8646  
8647  	WREG32(sts_clr_addr, sts_clr_val);
8648  
8649  	return error_count;
8650  }
8651  
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8652  static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8653  					u64 intr_cause_data)
8654  {
8655  	int i, error_count = 0;
8656  
8657  	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8658  		if (intr_cause_data & BIT(i)) {
8659  			gaudi2_print_event(hdev, event_type, true,
8660  				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8661  			error_count++;
8662  		}
8663  
8664  	hl_check_for_glbl_errors(hdev);
8665  
8666  	return error_count;
8667  }
8668  
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8669  static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8670  					u64 *event_mask)
8671  {
8672  	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8673  	int i;
8674  
8675  	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8676  	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8677  
8678  	sts_val = RREG32(sts_addr);
8679  
8680  	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8681  		if (sts_val & BIT(i)) {
8682  			gaudi2_print_event(hdev, event_type, true,
8683  				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8684  			sts_clr_val |= BIT(i);
8685  			error_count++;
8686  		}
8687  	}
8688  
8689  	/* check if RAZWI happened on WAP0/1 */
8690  	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8691  	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8692  	hl_check_for_glbl_errors(hdev);
8693  
8694  	WREG32(sts_clr_addr, sts_clr_val);
8695  
8696  	return error_count;
8697  }
8698  
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8699  static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8700  					u64 intr_cause_data)
8701  {
8702  	u32 error_count = 0;
8703  	int i;
8704  
8705  	/* If an AXI read or write error is received, an error is reported and
8706  	 * interrupt message is sent. Due to an HW errata, when reading the cause
8707  	 * register of the KDMA engine, the reported error is always HBW even if
8708  	 * the actual error caused by a LBW KDMA transaction.
8709  	 */
8710  	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8711  		if (intr_cause_data & BIT(i)) {
8712  			gaudi2_print_event(hdev, event_type, true,
8713  				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8714  			error_count++;
8715  		}
8716  
8717  	hl_check_for_glbl_errors(hdev);
8718  
8719  	return error_count;
8720  }
8721  
gaudi2_handle_dma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause)8722  static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8723  {
8724  	u32 error_count = 0;
8725  	int i;
8726  
8727  	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8728  		if (intr_cause & BIT(i)) {
8729  			gaudi2_print_event(hdev, event_type, true,
8730  				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8731  			error_count++;
8732  		}
8733  
8734  	hl_check_for_glbl_errors(hdev);
8735  
8736  	return error_count;
8737  }
8738  
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev,u64 * event_mask)8739  static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8740  {
8741  	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8742  
8743  	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8744  	if (RREG32(razwi_happened_addr)) {
8745  		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8746  							GAUDI2_ENGINE_ID_PCIE, event_mask);
8747  		WREG32(razwi_happened_addr, 0x1);
8748  	}
8749  
8750  	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8751  	if (RREG32(razwi_happened_addr)) {
8752  		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8753  							GAUDI2_ENGINE_ID_PCIE, event_mask);
8754  		WREG32(razwi_happened_addr, 0x1);
8755  	}
8756  
8757  	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8758  	if (RREG32(razwi_happened_addr)) {
8759  		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8760  							GAUDI2_ENGINE_ID_PCIE, event_mask);
8761  		WREG32(razwi_happened_addr, 0x1);
8762  	}
8763  
8764  	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8765  	if (RREG32(razwi_happened_addr)) {
8766  		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8767  							GAUDI2_ENGINE_ID_PCIE, event_mask);
8768  		WREG32(razwi_happened_addr, 0x1);
8769  	}
8770  }
8771  
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u16 event_type,u64 intr_cause_data,u64 * event_mask)8772  static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8773  					u64 intr_cause_data, u64 *event_mask)
8774  {
8775  	u32 error_count = 0;
8776  	int i;
8777  
8778  	gaudi2_print_event(hdev, event_type, true,
8779  		"intr_cause_data: %#llx", intr_cause_data);
8780  
8781  	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8782  		if (!(intr_cause_data & BIT_ULL(i)))
8783  			continue;
8784  
8785  		gaudi2_print_event(hdev, event_type, true,
8786  			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8787  		error_count++;
8788  
8789  		/*
8790  		 * Always check for LBW and HBW additional info as the indication itself is
8791  		 * sometimes missing
8792  		 */
8793  	}
8794  
8795  	hl_check_for_glbl_errors(hdev);
8796  	gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8797  
8798  	return error_count;
8799  }
8800  
gaudi2_handle_pif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8801  static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8802  				u64 intr_cause_data)
8803  
8804  {
8805  	u32 error_count = 0;
8806  	int i;
8807  
8808  	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8809  		if (intr_cause_data & BIT_ULL(i)) {
8810  			gaudi2_print_event(hdev, event_type, true,
8811  				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8812  			error_count++;
8813  		}
8814  	}
8815  
8816  	return error_count;
8817  }
8818  
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8819  static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8820  {
8821  	u32 error_count = 0;
8822  	int i;
8823  
8824  	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8825  		if (intr_cause_data & BIT_ULL(i)) {
8826  			gaudi2_print_event(hdev, event_type, true,
8827  				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8828  			error_count++;
8829  		}
8830  	}
8831  
8832  	return error_count;
8833  }
8834  
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu,u64 * event_mask)8835  static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8836  					u64 *event_mask)
8837  {
8838  	u32 valid, val;
8839  	u64 addr;
8840  
8841  	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8842  
8843  	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8844  		return;
8845  
8846  	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8847  	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8848  	addr <<= 32;
8849  	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8850  
8851  	if (is_pmmu) {
8852  		dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
8853  	} else {
8854  
8855  		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8856  		addr &= HW_UNSCRAMBLED_BITS_MASK;
8857  		dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
8858  				addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
8859  	}
8860  
8861  	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8862  
8863  	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8864  }
8865  
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8866  static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8867  {
8868  	u32 valid, val;
8869  	u64 addr;
8870  
8871  	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8872  
8873  	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8874  		return;
8875  
8876  	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8877  	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8878  	addr <<= 32;
8879  	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8880  
8881  	if (!is_pmmu)
8882  		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8883  
8884  	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8885  				is_pmmu ? "PMMU" : "HMMU", addr);
8886  	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8887  }
8888  
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,u16 event_type,u64 mmu_base,bool is_pmmu,u64 * event_mask)8889  static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8890  						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8891  {
8892  	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8893  	int i;
8894  
8895  	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8896  
8897  	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8898  		if (spi_sei_cause & BIT(i)) {
8899  			gaudi2_print_event(hdev, event_type, true,
8900  				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8901  
8902  			if (i == 0)
8903  				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8904  			else if (i == 1)
8905  				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8906  
8907  			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8908  				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8909  
8910  			error_count++;
8911  		}
8912  	}
8913  
8914  	/* Clear cause */
8915  	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8916  
8917  	/* Clear interrupt */
8918  	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8919  
8920  	return error_count;
8921  }
8922  
gaudi2_handle_sm_err(struct hl_device * hdev,u16 event_type,u8 sm_index)8923  static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8924  {
8925  	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8926  		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8927  	int i;
8928  
8929  	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8930  	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8931  
8932  	sei_cause_val = RREG32(sei_cause_addr);
8933  	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8934  	cq_intr_val = RREG32(cq_intr_addr);
8935  
8936  	/* SEI interrupt */
8937  	if (sei_cause_cause) {
8938  		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8939  		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8940  					sei_cause_val);
8941  
8942  		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8943  			if (!(sei_cause_cause & BIT(i)))
8944  				continue;
8945  
8946  			gaudi2_print_event(hdev, event_type, true,
8947  				"err cause: %s. %s: 0x%X",
8948  				gaudi2_sm_sei_cause[i].cause_name,
8949  				gaudi2_sm_sei_cause[i].log_name,
8950  				sei_cause_log);
8951  			error_count++;
8952  			break;
8953  		}
8954  
8955  		/* Clear SM_SEI_CAUSE */
8956  		WREG32(sei_cause_addr, 0);
8957  	}
8958  
8959  	/* CQ interrupt */
8960  	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8961  		cq_intr_queue_index =
8962  				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8963  					cq_intr_val);
8964  
8965  		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8966  				sm_index, cq_intr_queue_index);
8967  		error_count++;
8968  
8969  		/* Clear CQ_INTR */
8970  		WREG32(cq_intr_addr, 0);
8971  	}
8972  
8973  	hl_check_for_glbl_errors(hdev);
8974  
8975  	return error_count;
8976  }
8977  
get_hmmu_base(u16 event_type)8978  static u64 get_hmmu_base(u16 event_type)
8979  {
8980  	u8 dcore, index_in_dcore;
8981  
8982  	switch (event_type) {
8983  	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
8984  	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
8985  		dcore = 0;
8986  		index_in_dcore = 0;
8987  	break;
8988  	case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
8989  	case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
8990  		dcore = 1;
8991  		index_in_dcore = 0;
8992  	break;
8993  	case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
8994  	case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
8995  		dcore = 0;
8996  		index_in_dcore = 1;
8997  	break;
8998  	case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8999  	case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9000  		dcore = 1;
9001  		index_in_dcore = 1;
9002  	break;
9003  	case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9004  	case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9005  		dcore = 3;
9006  		index_in_dcore = 2;
9007  	break;
9008  	case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9009  	case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9010  		dcore = 2;
9011  		index_in_dcore = 2;
9012  	break;
9013  	case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9014  	case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9015  		dcore = 3;
9016  		index_in_dcore = 3;
9017  	break;
9018  	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9019  	case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9020  		dcore = 2;
9021  		index_in_dcore = 3;
9022  	break;
9023  	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9024  	case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9025  		dcore = 0;
9026  		index_in_dcore = 2;
9027  	break;
9028  	case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9029  	case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9030  		dcore = 1;
9031  		index_in_dcore = 2;
9032  	break;
9033  	case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9034  	case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9035  		dcore = 0;
9036  		index_in_dcore = 3;
9037  	break;
9038  	case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9039  	case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9040  		dcore = 1;
9041  		index_in_dcore = 3;
9042  	break;
9043  	case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9044  	case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9045  		dcore = 3;
9046  		index_in_dcore = 0;
9047  	break;
9048  	case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9049  	case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9050  		dcore = 2;
9051  		index_in_dcore = 0;
9052  	break;
9053  	case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9054  	case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9055  		dcore = 3;
9056  		index_in_dcore = 1;
9057  	break;
9058  	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9059  	case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9060  		dcore = 2;
9061  		index_in_dcore = 1;
9062  	break;
9063  	default:
9064  		return ULONG_MAX;
9065  	}
9066  
9067  	return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9068  }
9069  
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)9070  static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9071  {
9072  	bool is_pmmu = false;
9073  	u32 error_count = 0;
9074  	u64 mmu_base;
9075  
9076  	switch (event_type) {
9077  	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9078  	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9079  		mmu_base = get_hmmu_base(event_type);
9080  		break;
9081  
9082  	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9083  	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9084  		is_pmmu = true;
9085  		mmu_base = mmPMMU_HBW_MMU_BASE;
9086  		break;
9087  	default:
9088  		return 0;
9089  	}
9090  
9091  	if (mmu_base == ULONG_MAX)
9092  		return 0;
9093  
9094  	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9095  							is_pmmu, event_mask);
9096  	hl_check_for_glbl_errors(hdev);
9097  
9098  	return error_count;
9099  }
9100  
9101  
9102  /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)9103  static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9104  			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9105  {
9106  	u32 addr, beat, beat_shift;
9107  	bool rc = false;
9108  
9109  	dev_err_ratelimited(hdev->dev,
9110  			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9111  			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9112  			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9113  			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9114  
9115  	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9116  	dev_err_ratelimited(hdev->dev,
9117  			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9118  			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9119  			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9120  			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9121  			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9122  			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9123  
9124  	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9125  	for (beat = 0 ; beat < 4 ; beat++) {
9126  		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9127  			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9128  			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9129  						beat,
9130  						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9131  						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9132  
9133  		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9134  			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9135  			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9136  						beat,
9137  						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9138  						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9139  			rc |= true;
9140  		}
9141  
9142  		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9143  		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9144  			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9145  			dev_err_ratelimited(hdev->dev,
9146  					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9147  					beat,
9148  					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9149  					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9150  						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9151  						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9152  			rc |= true;
9153  		}
9154  
9155  		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9156  		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9157  					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9158  		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9159  					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9160  	}
9161  
9162  	return rc;
9163  }
9164  
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)9165  static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9166  			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9167  {
9168  	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9169  	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9170  
9171  	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9172  
9173  	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9174  				derr & 0x3, derr & 0xc);
9175  
9176  	/* JIRA H6-3286 - the following prints may not be valid */
9177  	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9178  	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9179  		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9180  		dev_err_ratelimited(hdev->dev,
9181  				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9182  				i,
9183  				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9184  				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9185  				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9186  				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9187  	}
9188  }
9189  
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)9190  static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9191  		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9192  {
9193  	__le32 *col_cmd = ca_par_err_data->dbg_col;
9194  	__le16 *row_cmd = ca_par_err_data->dbg_row;
9195  	u32 i;
9196  
9197  	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9198  
9199  	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9200  	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9201  		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9202  			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9203  			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9204  }
9205  
9206  /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)9207  static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9208  					struct hl_eq_hbm_sei_data *sei_data)
9209  {
9210  	bool require_hard_reset = false;
9211  	u32 hbm_id, mc_id, cause_idx;
9212  
9213  	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9214  	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9215  
9216  	cause_idx = sei_data->hdr.sei_cause;
9217  	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9218  		gaudi2_print_event(hdev, event_type, true,
9219  			"err cause: %s",
9220  			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9221  		return true;
9222  	}
9223  
9224  	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9225  		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9226  		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9227  		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9228  		hbm_mc_sei_cause[cause_idx]);
9229  
9230  	/* Print error-specific info */
9231  	switch (cause_idx) {
9232  	case HBM_SEI_CATTRIP:
9233  		require_hard_reset = true;
9234  		break;
9235  
9236  	case  HBM_SEI_CMD_PARITY_EVEN:
9237  		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9238  						le32_to_cpu(sei_data->hdr.cnt));
9239  		require_hard_reset = true;
9240  		break;
9241  
9242  	case  HBM_SEI_CMD_PARITY_ODD:
9243  		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9244  						le32_to_cpu(sei_data->hdr.cnt));
9245  		require_hard_reset = true;
9246  		break;
9247  
9248  	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9249  		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9250  						le32_to_cpu(sei_data->hdr.cnt));
9251  		require_hard_reset = true;
9252  		break;
9253  
9254  	case HBM_SEI_READ_ERR:
9255  		/* Unlike other SEI events, read error requires further processing of the
9256  		 * raw data in order to determine the root cause.
9257  		 */
9258  		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9259  								&sei_data->read_err_info,
9260  								le32_to_cpu(sei_data->hdr.cnt));
9261  		break;
9262  
9263  	default:
9264  		break;
9265  	}
9266  
9267  	require_hard_reset |= !!sei_data->hdr.is_critical;
9268  
9269  	return require_hard_reset;
9270  }
9271  
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)9272  static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9273  				u64 intr_cause_data)
9274  {
9275  	if (intr_cause_data) {
9276  		gaudi2_print_event(hdev, event_type, true,
9277  			"temperature error cause: %#llx", intr_cause_data);
9278  		return 1;
9279  	}
9280  
9281  	return 0;
9282  }
9283  
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)9284  static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9285  {
9286  	u32 i, error_count = 0;
9287  
9288  	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9289  		if (intr_cause_data & hbm_mc_spi[i].mask) {
9290  			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9291  				hbm_mc_spi[i].cause);
9292  			error_count++;
9293  		}
9294  
9295  	return error_count;
9296  }
9297  
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)9298  static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9299  {
9300  	ktime_t zero_time = ktime_set(0, 0);
9301  
9302  	mutex_lock(&hdev->clk_throttling.lock);
9303  
9304  	switch (event_type) {
9305  	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9306  		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9307  		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9308  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9309  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9310  		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9311  		break;
9312  
9313  	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9314  		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9315  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9316  		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9317  		break;
9318  
9319  	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9320  		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9321  		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9322  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9323  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9324  		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9325  		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9326  		break;
9327  
9328  	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9329  		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9330  		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9331  		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9332  		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9333  		break;
9334  
9335  	default:
9336  		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9337  		break;
9338  	}
9339  
9340  	mutex_unlock(&hdev->clk_throttling.lock);
9341  }
9342  
gaudi2_print_out_of_sync_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9343  static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9344  					struct cpucp_pkt_sync_err *sync_err)
9345  {
9346  	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9347  
9348  	gaudi2_print_event(hdev, event_type, false,
9349  		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9350  		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9351  		q->pi, atomic_read(&q->ci));
9352  }
9353  
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev,u16 event_type)9354  static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9355  {
9356  	u32 p2p_intr, msix_gw_intr, error_count = 0;
9357  
9358  	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9359  	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9360  
9361  	if (p2p_intr) {
9362  		gaudi2_print_event(hdev, event_type, true,
9363  			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9364  			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9365  
9366  		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9367  		error_count++;
9368  	}
9369  
9370  	if (msix_gw_intr) {
9371  		gaudi2_print_event(hdev, event_type, true,
9372  			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9373  			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9374  
9375  		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9376  		error_count++;
9377  	}
9378  
9379  	return error_count;
9380  }
9381  
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)9382  static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9383  			struct hl_eq_pcie_drain_ind_data *drain_data)
9384  {
9385  	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9386  
9387  	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9388  	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9389  	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9390  	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9391  	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9392  
9393  	if (cause & BIT_ULL(0)) {
9394  		dev_err_ratelimited(hdev->dev,
9395  			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9396  			!!lbw_rd, !!lbw_wr);
9397  		error_count++;
9398  	}
9399  
9400  	if (cause & BIT_ULL(1)) {
9401  		dev_err_ratelimited(hdev->dev,
9402  			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9403  			hbw_rd, hbw_wr);
9404  		error_count++;
9405  	}
9406  
9407  	return error_count;
9408  }
9409  
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)9410  static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9411  {
9412  	u32 error_count = 0;
9413  	int i;
9414  
9415  	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9416  		if (intr_cause_data & BIT_ULL(i)) {
9417  			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9418  				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9419  			error_count++;
9420  		}
9421  	}
9422  
9423  	hl_check_for_glbl_errors(hdev);
9424  
9425  	return error_count;
9426  }
9427  
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9428  static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9429  					struct cpucp_pkt_sync_err *sync_err)
9430  {
9431  	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9432  
9433  	gaudi2_print_event(hdev, event_type, false,
9434  		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9435  		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9436  }
9437  
hl_arc_event_handle(struct hl_device * hdev,u16 event_type,struct hl_eq_engine_arc_intr_data * data)9438  static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9439  					struct hl_eq_engine_arc_intr_data *data)
9440  {
9441  	struct hl_engine_arc_dccm_queue_full_irq *q;
9442  	u32 intr_type, engine_id;
9443  	u64 payload;
9444  
9445  	intr_type = le32_to_cpu(data->intr_type);
9446  	engine_id = le32_to_cpu(data->engine_id);
9447  	payload = le64_to_cpu(data->payload);
9448  
9449  	switch (intr_type) {
9450  	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9451  		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9452  
9453  		gaudi2_print_event(hdev, event_type, true,
9454  				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9455  				engine_id, intr_type, q->queue_index);
9456  		return 1;
9457  	default:
9458  		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9459  		return 0;
9460  	}
9461  }
9462  
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)9463  static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9464  {
9465  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9466  	bool reset_required = false, is_critical = false;
9467  	u32 index, ctl, reset_flags = 0, error_count = 0;
9468  	u64 event_mask = 0;
9469  	u16 event_type;
9470  
9471  	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9472  	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9473  
9474  	if (event_type >= GAUDI2_EVENT_SIZE) {
9475  		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9476  				event_type, GAUDI2_EVENT_SIZE - 1);
9477  		return;
9478  	}
9479  
9480  	gaudi2->events_stat[event_type]++;
9481  	gaudi2->events_stat_aggregate[event_type]++;
9482  
9483  	switch (event_type) {
9484  	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9485  		fallthrough;
9486  	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9487  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9488  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9489  		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9490  		is_critical = eq_entry->ecc_data.is_critical;
9491  		error_count++;
9492  		break;
9493  
9494  	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9495  		fallthrough;
9496  	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9497  		fallthrough;
9498  	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9499  		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9500  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9501  		break;
9502  
9503  	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9504  		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9505  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9506  		break;
9507  
9508  	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9509  		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9510  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9511  		event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9512  		break;
9513  
9514  	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9515  	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9516  		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9517  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9518  		break;
9519  
9520  	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9521  	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9522  		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9523  		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9524  					&eq_entry->razwi_with_intr_cause, &event_mask);
9525  		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9526  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9527  		break;
9528  
9529  	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9530  		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9531  		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9532  						&eq_entry->razwi_with_intr_cause, &event_mask);
9533  		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9534  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9535  		break;
9536  
9537  	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9538  		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9539  		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9540  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9541  		break;
9542  
9543  	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9544  	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9545  	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9546  	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9547  	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9548  	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9549  	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9550  	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9551  	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9552  	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9553  	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9554  	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9555  	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9556  	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9557  	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9558  	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9559  	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9560  	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9561  	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9562  	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9563  	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9564  	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9565  	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9566  	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9567  	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9568  		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9569  			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9570  		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9571  					&eq_entry->razwi_with_intr_cause, &event_mask);
9572  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9573  		break;
9574  
9575  	case GAUDI2_EVENT_DEC0_SPI:
9576  	case GAUDI2_EVENT_DEC1_SPI:
9577  	case GAUDI2_EVENT_DEC2_SPI:
9578  	case GAUDI2_EVENT_DEC3_SPI:
9579  	case GAUDI2_EVENT_DEC4_SPI:
9580  	case GAUDI2_EVENT_DEC5_SPI:
9581  	case GAUDI2_EVENT_DEC6_SPI:
9582  	case GAUDI2_EVENT_DEC7_SPI:
9583  	case GAUDI2_EVENT_DEC8_SPI:
9584  	case GAUDI2_EVENT_DEC9_SPI:
9585  		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9586  				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9587  		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9588  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9589  		break;
9590  
9591  	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9592  	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9593  	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9594  	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9595  		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9596  				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9597  						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9598  		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9599  		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9600  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9601  		break;
9602  
9603  	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9604  	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9605  	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9606  	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9607  		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9608  				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9609  					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9610  		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9611  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9612  		break;
9613  
9614  	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9615  	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9616  	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9617  	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9618  		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9619  				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9620  					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9621  		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9622  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9623  		break;
9624  
9625  	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9626  	case GAUDI2_EVENT_KDMA0_CORE:
9627  		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9628  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9629  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9630  		break;
9631  
9632  	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9633  		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9634  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9635  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9636  		break;
9637  
9638  	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9639  		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9640  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9641  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9642  		break;
9643  
9644  	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9645  		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9646  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9647  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9648  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9649  		break;
9650  
9651  	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9652  	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9653  	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9654  	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9655  		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9656  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9657  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9658  		break;
9659  
9660  	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9661  		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9662  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9663  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9664  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9665  		break;
9666  
9667  	case GAUDI2_EVENT_PMMU_FATAL_0:
9668  		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9669  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9670  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9671  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9672  		break;
9673  
9674  	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9675  		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9676  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9677  		break;
9678  
9679  	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9680  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9681  		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9682  			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9683  			reset_required = true;
9684  		}
9685  		error_count++;
9686  		break;
9687  
9688  	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9689  		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9690  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9691  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9692  		break;
9693  
9694  	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9695  		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9696  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9697  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9698  		break;
9699  
9700  	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9701  		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9702  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9703  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9704  		break;
9705  
9706  	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9707  		error_count = gaudi2_handle_psoc_drain(hdev,
9708  				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9709  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9710  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9711  		break;
9712  
9713  	case GAUDI2_EVENT_CPU_AXI_ECC:
9714  		error_count = GAUDI2_NA_EVENT_CAUSE;
9715  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9716  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9717  		break;
9718  	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9719  		error_count = GAUDI2_NA_EVENT_CAUSE;
9720  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9721  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9722  		break;
9723  	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9724  	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9725  	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9726  	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9727  		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9728  						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9729  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9730  		break;
9731  	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9732  		error_count = GAUDI2_NA_EVENT_CAUSE;
9733  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9734  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9735  		break;
9736  	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9737  		error_count = GAUDI2_NA_EVENT_CAUSE;
9738  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9739  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9740  		break;
9741  	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9742  		error_count = GAUDI2_NA_EVENT_CAUSE;
9743  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9744  		break;
9745  	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9746  		error_count = GAUDI2_NA_EVENT_CAUSE;
9747  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9748  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9749  		break;
9750  	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9751  		error_count = GAUDI2_NA_EVENT_CAUSE;
9752  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9753  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9754  		break;
9755  	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9756  	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9757  	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9758  	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9759  	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9760  	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9761  	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9762  	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9763  	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9764  	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9765  	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9766  	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9767  	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9768  	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9769  	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9770  	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9771  	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9772  	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9773  	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9774  	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9775  	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9776  	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9777  	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9778  	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9779  	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9780  	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9781  	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9782  	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9783  	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9784  	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9785  	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9786  	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9787  	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9788  	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9789  	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9790  	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9791  	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9792  	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9793  		fallthrough;
9794  	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9795  	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9796  	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9797  	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9798  	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9799  	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9800  	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9801  	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9802  	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9803  	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9804  	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9805  		error_count = GAUDI2_NA_EVENT_CAUSE;
9806  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9807  		break;
9808  
9809  	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9810  	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9811  	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9812  	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9813  		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9814  		error_count = GAUDI2_NA_EVENT_CAUSE;
9815  		break;
9816  
9817  	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9818  		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9819  		error_count = GAUDI2_NA_EVENT_CAUSE;
9820  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9821  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9822  		break;
9823  
9824  	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9825  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9826  		error_count = GAUDI2_NA_EVENT_CAUSE;
9827  		/* Do nothing- FW will handle it */
9828  		break;
9829  
9830  	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9831  		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9832  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9833  		break;
9834  
9835  	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9836  		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9837  		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9838  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9839  		break;
9840  
9841  	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9842  		error_count = GAUDI2_NA_EVENT_CAUSE;
9843  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9844  		break;
9845  
9846  	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9847  		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9848  						le64_to_cpu(eq_entry->data[0]));
9849  		error_count = GAUDI2_NA_EVENT_CAUSE;
9850  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9851  		break;
9852  	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9853  		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9854  						le64_to_cpu(eq_entry->data[0]));
9855  		error_count = GAUDI2_NA_EVENT_CAUSE;
9856  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9857  		break;
9858  
9859  	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9860  		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9861  		error_count = GAUDI2_NA_EVENT_CAUSE;
9862  		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9863  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9864  		break;
9865  
9866  	case GAUDI2_EVENT_ARC_DCCM_FULL:
9867  		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9868  		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9869  		break;
9870  
9871  	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9872  	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9873  		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9874  		error_count = GAUDI2_NA_EVENT_CAUSE;
9875  		is_critical = true;
9876  		break;
9877  
9878  	default:
9879  		if (gaudi2_irq_map_table[event_type].valid) {
9880  			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9881  						event_type);
9882  			error_count = GAUDI2_NA_EVENT_CAUSE;
9883  		}
9884  	}
9885  
9886  	/* Make sure to dump an error in case no error cause was printed so far.
9887  	 * Note that although we have counted the errors, we use this number as
9888  	 * a boolean.
9889  	 */
9890  	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9891  		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9892  	else if (error_count == 0)
9893  		gaudi2_print_event(hdev, event_type, true,
9894  				"No error cause for H/W event %u", event_type);
9895  
9896  	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9897  				reset_required) {
9898  		if (reset_required ||
9899  				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9900  			reset_flags |= HL_DRV_RESET_HARD;
9901  
9902  		if (hdev->hard_reset_on_fw_events ||
9903  				(hdev->asic_prop.fw_security_enabled && is_critical))
9904  			goto reset_device;
9905  	}
9906  
9907  	/* Send unmask irq only for interrupts not classified as MSG */
9908  	if (!gaudi2_irq_map_table[event_type].msg)
9909  		hl_fw_unmask_irq(hdev, event_type);
9910  
9911  	if (event_mask)
9912  		hl_notifier_event_send_all(hdev, event_mask);
9913  
9914  	return;
9915  
9916  reset_device:
9917  	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9918  		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9919  		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9920  	} else {
9921  		reset_flags |= HL_DRV_RESET_DELAY;
9922  	}
9923  	/* escalate general hw errors to critical/fatal error */
9924  	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
9925  		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
9926  
9927  	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9928  	hl_device_cond_reset(hdev, reset_flags, event_mask);
9929  }
9930  
gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device * hdev,struct packet_lin_dma * lin_dma_pkt,dma_addr_t pkt_dma_addr,u32 hw_queue_id,u32 size,u64 addr,u32 val)9931  static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9932  			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9933  			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9934  {
9935  	u32 ctl, pkt_size;
9936  	int rc = 0;
9937  
9938  	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9939  	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9940  	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9941  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9942  
9943  	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9944  	lin_dma_pkt->src_addr = cpu_to_le64(val);
9945  	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9946  	lin_dma_pkt->tsize = cpu_to_le32(size);
9947  
9948  	pkt_size = sizeof(struct packet_lin_dma);
9949  
9950  	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9951  	if (rc)
9952  		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9953  				hw_queue_id);
9954  
9955  	return rc;
9956  }
9957  
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)9958  static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9959  {
9960  	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9961  					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9962  					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9963  					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9964  	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9965  		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9966  	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9967  	struct asic_fixed_properties *prop = &hdev->asic_prop;
9968  	void *lin_dma_pkts_arr;
9969  	dma_addr_t pkt_dma_addr;
9970  	int rc = 0, dma_num = 0;
9971  
9972  	if (prop->edma_enabled_mask == 0) {
9973  		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9974  		return -EIO;
9975  	}
9976  
9977  	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9978  	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9979  	comp_addr = CFG_BASE + sob_addr;
9980  	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9981  		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9982  	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9983  		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9984  
9985  	/* Calculate how many lin dma pkts we'll need */
9986  	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9987  	pkt_size = sizeof(struct packet_lin_dma);
9988  
9989  	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9990  					&pkt_dma_addr, GFP_KERNEL);
9991  	if (!lin_dma_pkts_arr)
9992  		return -ENOMEM;
9993  
9994  	/*
9995  	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9996  	 * only the first one to restore later
9997  	 * also set the sob addr for all edma cores for completion.
9998  	 * set QM as trusted to allow it to access physical address with MMU bp.
9999  	 */
10000  	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10001  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10002  		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10003  			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10004  			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10005  
10006  			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10007  				continue;
10008  
10009  			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10010  					edma_offset, mmubp);
10011  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10012  					lower_32_bits(comp_addr));
10013  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10014  					upper_32_bits(comp_addr));
10015  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10016  					comp_val);
10017  			gaudi2_qman_set_test_mode(hdev,
10018  					edma_queues_id[dcore] + 4 * edma_idx, true);
10019  		}
10020  	}
10021  
10022  	WREG32(sob_addr, 0);
10023  
10024  	while (cur_addr < end_addr) {
10025  		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10026  			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10027  				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10028  
10029  				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10030  					continue;
10031  
10032  				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10033  
10034  				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10035  					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10036  					pkt_dma_addr + dma_num * pkt_size,
10037  					edma_queues_id[dcore] + edma_idx * 4,
10038  					chunk_size, cur_addr, val);
10039  				if (rc)
10040  					goto end;
10041  
10042  				dma_num++;
10043  				cur_addr += chunk_size;
10044  				if (cur_addr == end_addr)
10045  					break;
10046  			}
10047  		}
10048  	}
10049  
10050  	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10051  	if (rc) {
10052  		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10053  		goto end;
10054  	}
10055  end:
10056  	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10057  		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10058  			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10059  			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10060  
10061  			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10062  				continue;
10063  
10064  			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10065  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10066  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10067  			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10068  			gaudi2_qman_set_test_mode(hdev,
10069  					edma_queues_id[dcore] + 4 * edma_idx, false);
10070  		}
10071  	}
10072  
10073  	WREG32(sob_addr, 0);
10074  	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10075  
10076  	return rc;
10077  }
10078  
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)10079  static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10080  {
10081  	int rc;
10082  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10083  	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10084  
10085  	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10086  
10087  	if (rc)
10088  		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10089  				prop->dram_user_base_address, size);
10090  	return rc;
10091  }
10092  
gaudi2_scrub_device_mem(struct hl_device * hdev)10093  static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10094  {
10095  	int rc;
10096  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10097  	u64 val = hdev->memory_scrub_val;
10098  	u64 addr, size;
10099  
10100  	if (!hdev->memory_scrub)
10101  		return 0;
10102  
10103  	/* scrub SRAM */
10104  	addr = prop->sram_user_base_address;
10105  	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10106  	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10107  			addr, addr + size, val);
10108  	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10109  	if (rc) {
10110  		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10111  		return rc;
10112  	}
10113  
10114  	/* scrub DRAM */
10115  	rc = gaudi2_scrub_device_dram(hdev, val);
10116  	if (rc) {
10117  		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10118  		return rc;
10119  	}
10120  	return 0;
10121  }
10122  
gaudi2_restore_user_sm_registers(struct hl_device * hdev)10123  static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10124  {
10125  	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10126  		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10127  	u32 val, size, offset;
10128  	int dcore_id;
10129  
10130  	offset = hdev->asic_prop.first_available_cq[0] * 4;
10131  	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10132  	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10133  	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10134  	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10135  	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10136  	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10137  	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10138  			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10139  
10140  	/* memset dcore0 CQ registers */
10141  	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10142  	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10143  	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10144  	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10145  	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10146  	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10147  
10148  	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10149  	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10150  	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10151  	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10152  	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10153  	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10154  	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10155  
10156  	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10157  		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10158  		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10159  		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10160  		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10161  		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10162  		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10163  
10164  		cq_lbw_l_addr += DCORE_OFFSET;
10165  		cq_lbw_h_addr += DCORE_OFFSET;
10166  		cq_lbw_data_addr += DCORE_OFFSET;
10167  		cq_base_l_addr += DCORE_OFFSET;
10168  		cq_base_h_addr += DCORE_OFFSET;
10169  		cq_size_addr += DCORE_OFFSET;
10170  	}
10171  
10172  	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10173  	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10174  	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10175  	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10176  
10177  	/* memset dcore0 monitors */
10178  	gaudi2_memset_device_lbw(hdev, addr, size, val);
10179  
10180  	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10181  	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10182  
10183  	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10184  	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10185  	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10186  
10187  	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10188  		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10189  		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10190  		mon_sts_addr += DCORE_OFFSET;
10191  		mon_cfg_addr += DCORE_OFFSET;
10192  	}
10193  
10194  	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10195  	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10196  	val = 0;
10197  	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10198  			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10199  
10200  	/* memset dcore0 sobs */
10201  	gaudi2_memset_device_lbw(hdev, addr, size, val);
10202  
10203  	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10204  	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10205  
10206  	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10207  		gaudi2_memset_device_lbw(hdev, addr, size, val);
10208  		addr += DCORE_OFFSET;
10209  	}
10210  
10211  	/* Flush all WREG to prevent race */
10212  	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10213  }
10214  
gaudi2_restore_user_qm_registers(struct hl_device * hdev)10215  static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10216  {
10217  	u32 reg_base, hw_queue_id;
10218  
10219  	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10220  							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10221  		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10222  			continue;
10223  
10224  		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10225  
10226  		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10227  		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10228  	}
10229  
10230  	/* Flush all WREG to prevent race */
10231  	RREG32(mmPDMA0_QM_ARB_CFG_0);
10232  }
10233  
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)10234  static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10235  {
10236  	u32 reg_base, hw_queue_id;
10237  
10238  	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10239  							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10240  		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10241  			continue;
10242  
10243  		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10244  
10245  		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10246  		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10247  	}
10248  
10249  	/* Flush all WREG to prevent race */
10250  	RREG32(mmPDMA0_QM_ARB_CFG_0);
10251  }
10252  
gaudi2_context_switch(struct hl_device * hdev,u32 asid)10253  static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10254  {
10255  	return 0;
10256  }
10257  
gaudi2_restore_phase_topology(struct hl_device * hdev)10258  static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10259  {
10260  }
10261  
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)10262  static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10263  						struct dup_block_ctx *cfg_ctx)
10264  {
10265  	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10266  	u8 seq;
10267  	int i;
10268  
10269  	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10270  		seq = block_idx * cfg_ctx->instances + i;
10271  
10272  		/* skip disabled instance */
10273  		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10274  			continue;
10275  
10276  		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10277  					cfg_ctx->data);
10278  	}
10279  }
10280  
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)10281  static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10282  						u64 mask)
10283  {
10284  	int i;
10285  
10286  	cfg_ctx->enabled_mask = mask;
10287  
10288  	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10289  		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10290  }
10291  
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)10292  void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10293  {
10294  	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10295  }
10296  
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)10297  static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10298  {
10299  	void *host_mem_virtual_addr;
10300  	dma_addr_t host_mem_dma_addr;
10301  	u64 reserved_va_base;
10302  	u32 pos, size_left, size_to_dma;
10303  	struct hl_ctx *ctx;
10304  	int rc = 0;
10305  
10306  	/* Fetch the ctx */
10307  	ctx = hl_get_compute_ctx(hdev);
10308  	if (!ctx) {
10309  		dev_err(hdev->dev, "No ctx available\n");
10310  		return -EINVAL;
10311  	}
10312  
10313  	/* Allocate buffers for read and for poll */
10314  	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10315  								GFP_KERNEL | __GFP_ZERO);
10316  	if (host_mem_virtual_addr == NULL) {
10317  		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10318  		rc = -ENOMEM;
10319  		goto put_ctx;
10320  	}
10321  
10322  	/* Reserve VM region on asic side */
10323  	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10324  						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10325  	if (!reserved_va_base) {
10326  		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10327  		rc = -ENOMEM;
10328  		goto free_data_buffer;
10329  	}
10330  
10331  	/* Create mapping on asic side */
10332  	mutex_lock(&hdev->mmu_lock);
10333  
10334  	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10335  	if (rc) {
10336  		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10337  		goto unreserve_va;
10338  	}
10339  
10340  	rc = hl_mmu_invalidate_cache_range(hdev, false,
10341  				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10342  				      ctx->asid, reserved_va_base, SZ_2M);
10343  	if (rc) {
10344  		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10345  		goto unreserve_va;
10346  	}
10347  
10348  	mutex_unlock(&hdev->mmu_lock);
10349  
10350  	/* Enable MMU on KDMA */
10351  	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10352  
10353  	pos = 0;
10354  	size_left = size;
10355  	size_to_dma = SZ_2M;
10356  
10357  	while (size_left > 0) {
10358  		if (size_left < SZ_2M)
10359  			size_to_dma = size_left;
10360  
10361  		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10362  		if (rc)
10363  			break;
10364  
10365  		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10366  
10367  		if (size_left <= SZ_2M)
10368  			break;
10369  
10370  		pos += SZ_2M;
10371  		addr += SZ_2M;
10372  		size_left -= SZ_2M;
10373  	}
10374  
10375  	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10376  
10377  	mutex_lock(&hdev->mmu_lock);
10378  
10379  	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10380  	if (rc)
10381  		goto unreserve_va;
10382  
10383  	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10384  				      ctx->asid, reserved_va_base, SZ_2M);
10385  
10386  unreserve_va:
10387  	mutex_unlock(&hdev->mmu_lock);
10388  	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10389  free_data_buffer:
10390  	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10391  put_ctx:
10392  	hl_ctx_put(ctx);
10393  
10394  	return rc;
10395  }
10396  
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)10397  static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10398  {
10399  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10400  	int min_alloc_order, rc;
10401  
10402  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10403  		return 0;
10404  
10405  	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10406  								HOST_SPACE_INTERNAL_CB_SZ,
10407  								&hdev->internal_cb_pool_dma_addr,
10408  								GFP_KERNEL | __GFP_ZERO);
10409  
10410  	if (!hdev->internal_cb_pool_virt_addr)
10411  		return -ENOMEM;
10412  
10413  	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10414  					gaudi2_get_wait_cb_size(hdev)));
10415  
10416  	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10417  	if (!hdev->internal_cb_pool) {
10418  		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10419  		rc = -ENOMEM;
10420  		goto free_internal_cb_pool;
10421  	}
10422  
10423  	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10424  				HOST_SPACE_INTERNAL_CB_SZ, -1);
10425  	if (rc) {
10426  		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10427  		rc = -EFAULT;
10428  		goto destroy_internal_cb_pool;
10429  	}
10430  
10431  	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10432  					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10433  
10434  	if (!hdev->internal_cb_va_base) {
10435  		rc = -ENOMEM;
10436  		goto destroy_internal_cb_pool;
10437  	}
10438  
10439  	mutex_lock(&hdev->mmu_lock);
10440  
10441  	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10442  					HOST_SPACE_INTERNAL_CB_SZ);
10443  	if (rc)
10444  		goto unreserve_internal_cb_pool;
10445  
10446  	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10447  	if (rc)
10448  		goto unmap_internal_cb_pool;
10449  
10450  	mutex_unlock(&hdev->mmu_lock);
10451  
10452  	return 0;
10453  
10454  unmap_internal_cb_pool:
10455  	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10456  unreserve_internal_cb_pool:
10457  	mutex_unlock(&hdev->mmu_lock);
10458  	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10459  destroy_internal_cb_pool:
10460  	gen_pool_destroy(hdev->internal_cb_pool);
10461  free_internal_cb_pool:
10462  	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10463  					hdev->internal_cb_pool_dma_addr);
10464  
10465  	return rc;
10466  }
10467  
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)10468  static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10469  {
10470  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10471  
10472  	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10473  		return;
10474  
10475  	mutex_lock(&hdev->mmu_lock);
10476  	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10477  	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10478  	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10479  	mutex_unlock(&hdev->mmu_lock);
10480  
10481  	gen_pool_destroy(hdev->internal_cb_pool);
10482  
10483  	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10484  					hdev->internal_cb_pool_dma_addr);
10485  }
10486  
gaudi2_restore_user_registers(struct hl_device * hdev)10487  static void gaudi2_restore_user_registers(struct hl_device *hdev)
10488  {
10489  	gaudi2_restore_user_sm_registers(hdev);
10490  	gaudi2_restore_user_qm_registers(hdev);
10491  }
10492  
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10493  static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10494  {
10495  	struct hl_device *hdev = ctx->hdev;
10496  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10497  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10498  	int rc;
10499  
10500  	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10501  				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10502  	if (rc)
10503  		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10504  			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10505  
10506  	return rc;
10507  }
10508  
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10509  static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10510  {
10511  	struct hl_device *hdev = ctx->hdev;
10512  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10513  	int rc;
10514  
10515  	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10516  				prop->pmmu.page_size, true);
10517  	if (rc)
10518  		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10519  			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10520  }
10521  
gaudi2_ctx_init(struct hl_ctx * ctx)10522  static int gaudi2_ctx_init(struct hl_ctx *ctx)
10523  {
10524  	int rc;
10525  
10526  	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10527  	if (rc)
10528  		return rc;
10529  
10530  	/* No need to clear user registers if the device has just
10531  	 * performed reset, we restore only nic qm registers
10532  	 */
10533  	if (ctx->hdev->reset_upon_device_release)
10534  		gaudi2_restore_nic_qm_registers(ctx->hdev);
10535  	else
10536  		gaudi2_restore_user_registers(ctx->hdev);
10537  
10538  	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10539  	if (rc)
10540  		return rc;
10541  
10542  	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10543  	if (rc)
10544  		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10545  
10546  	return rc;
10547  }
10548  
gaudi2_ctx_fini(struct hl_ctx * ctx)10549  static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10550  {
10551  	if (ctx->asid == HL_KERNEL_ASID_ID)
10552  		return;
10553  
10554  	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10555  
10556  	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10557  }
10558  
gaudi2_pre_schedule_cs(struct hl_cs * cs)10559  static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10560  {
10561  	struct hl_device *hdev = cs->ctx->hdev;
10562  	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10563  	u32 mon_payload, sob_id, mon_id;
10564  
10565  	if (!cs_needs_completion(cs))
10566  		return 0;
10567  
10568  	/*
10569  	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10570  	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10571  	 * cyclic index. The SOB value is increased when each of the CS jobs is
10572  	 * completed. When the SOB reaches the number of CS jobs, the monitor
10573  	 * generates MSI-X interrupt.
10574  	 */
10575  
10576  	sob_id = mon_id = index;
10577  	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10578  				(1 << CQ_ENTRY_READY_SHIFT) | index;
10579  
10580  	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10581  				cs->jobs_cnt);
10582  
10583  	return 0;
10584  }
10585  
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)10586  static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10587  {
10588  	return HL_INVALID_QUEUE;
10589  }
10590  
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)10591  static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10592  {
10593  	struct hl_cb *cb = data;
10594  	struct packet_msg_short *pkt;
10595  	u32 value, ctl, pkt_size = sizeof(*pkt);
10596  
10597  	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10598  	memset(pkt, 0, pkt_size);
10599  
10600  	/* Inc by 1, Mode ADD */
10601  	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10602  	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10603  
10604  	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10605  	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10606  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10607  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10608  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10609  
10610  	pkt->value = cpu_to_le32(value);
10611  	pkt->ctl = cpu_to_le32(ctl);
10612  
10613  	return size + pkt_size;
10614  }
10615  
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)10616  static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10617  {
10618  	u32 ctl, pkt_size = sizeof(*pkt);
10619  
10620  	memset(pkt, 0, pkt_size);
10621  
10622  	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10623  	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10624  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10625  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10626  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10627  
10628  	pkt->value = cpu_to_le32(value);
10629  	pkt->ctl = cpu_to_le32(ctl);
10630  
10631  	return pkt_size;
10632  }
10633  
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)10634  static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10635  					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10636  {
10637  	u32 ctl, value, pkt_size = sizeof(*pkt);
10638  	u8 mask;
10639  
10640  	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10641  		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10642  		return 0;
10643  	}
10644  
10645  	memset(pkt, 0, pkt_size);
10646  
10647  	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10648  	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10649  	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10650  	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10651  
10652  	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10653  	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10654  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10655  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10656  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10657  
10658  	pkt->value = cpu_to_le32(value);
10659  	pkt->ctl = cpu_to_le32(ctl);
10660  
10661  	return pkt_size;
10662  }
10663  
gaudi2_add_fence_pkt(struct packet_fence * pkt)10664  static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10665  {
10666  	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10667  
10668  	memset(pkt, 0, pkt_size);
10669  
10670  	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10671  	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10672  	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10673  
10674  	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10675  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10676  	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10677  
10678  	pkt->cfg = cpu_to_le32(cfg);
10679  	pkt->ctl = cpu_to_le32(ctl);
10680  
10681  	return pkt_size;
10682  }
10683  
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)10684  static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10685  {
10686  	struct hl_cb *cb = prop->data;
10687  	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10688  
10689  	u64 monitor_base, fence_addr = 0;
10690  	u32 stream_index, size = prop->size;
10691  	u16 msg_addr_offset;
10692  
10693  	stream_index = prop->q_idx % 4;
10694  	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10695  			QM_FENCE2_OFFSET + stream_index * 4;
10696  
10697  	/*
10698  	 * monitor_base should be the content of the base0 address registers,
10699  	 * so it will be added to the msg short offsets
10700  	 */
10701  	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10702  
10703  	/* First monitor config packet: low address of the sync */
10704  	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10705  				monitor_base;
10706  
10707  	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10708  
10709  	/* Second monitor config packet: high address of the sync */
10710  	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10711  				monitor_base;
10712  
10713  	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10714  
10715  	/*
10716  	 * Third monitor config packet: the payload, i.e. what to write when the
10717  	 * sync triggers
10718  	 */
10719  	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10720  				monitor_base;
10721  
10722  	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10723  
10724  	/* Fourth monitor config packet: bind the monitor to a sync object */
10725  	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10726  
10727  	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10728  						prop->sob_val, msg_addr_offset);
10729  
10730  	/* Fence packet */
10731  	size += gaudi2_add_fence_pkt(buf + size);
10732  
10733  	return size;
10734  }
10735  
gaudi2_reset_sob(struct hl_device * hdev,void * data)10736  static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10737  {
10738  	struct hl_hw_sob *hw_sob = data;
10739  
10740  	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10741  
10742  	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10743  
10744  	kref_init(&hw_sob->kref);
10745  }
10746  
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)10747  static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10748  {
10749  }
10750  
gaudi2_get_device_time(struct hl_device * hdev)10751  static u64 gaudi2_get_device_time(struct hl_device *hdev)
10752  {
10753  	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10754  
10755  	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10756  }
10757  
gaudi2_collective_wait_init_cs(struct hl_cs * cs)10758  static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10759  {
10760  	return 0;
10761  }
10762  
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)10763  static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10764  					struct hl_cs *cs, u32 wait_queue_id,
10765  					u32 collective_engine_id, u32 encaps_signal_offset)
10766  {
10767  	return -EINVAL;
10768  }
10769  
10770  /*
10771   * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10772   *                   to DMMU page-size address (64MB) before mapping it in
10773   *                   the MMU.
10774   * The operation is performed on both the virtual and physical addresses.
10775   * for device with 6 HBMs the scramble is:
10776   * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10777   *
10778   * Example:
10779   * =============================================================================
10780   * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10781   * Phys address                                                     in MMU last
10782   *                                                                    HOP
10783   * =============================================================================
10784   * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10785   * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10786   * =============================================================================
10787   */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)10788  static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10789  {
10790  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10791  	u32 divisor, mod_va;
10792  	u64 div_va;
10793  
10794  	/* accept any address in the DRAM address space */
10795  	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10796  									VA_HBM_SPACE_END)) {
10797  
10798  		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10799  		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10800  		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10801  			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10802  			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10803  	}
10804  
10805  	return raw_addr;
10806  }
10807  
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)10808  static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10809  {
10810  	struct asic_fixed_properties *prop = &hdev->asic_prop;
10811  	u32 divisor, mod_va;
10812  	u64 div_va;
10813  
10814  	/* accept any address in the DRAM address space */
10815  	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10816  									VA_HBM_SPACE_END)) {
10817  
10818  		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10819  		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10820  					PAGE_SIZE_64MB, &mod_va);
10821  
10822  		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10823  					(div_va * divisor + mod_va));
10824  	}
10825  
10826  	return scrambled_addr;
10827  }
10828  
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)10829  static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10830  {
10831  	u32 base = 0, dcore_id, dec_id;
10832  
10833  	if (core_id >= NUMBER_OF_DEC) {
10834  		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10835  		goto out;
10836  	}
10837  
10838  	if (core_id < 8) {
10839  		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10840  		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10841  
10842  		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10843  				dec_id * DCORE_VDEC_OFFSET;
10844  	} else {
10845  		/* PCIe Shared Decoder */
10846  		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10847  	}
10848  out:
10849  	return base;
10850  }
10851  
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)10852  static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10853  				u32 *block_size, u32 *block_id)
10854  {
10855  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10856  	int i;
10857  
10858  	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10859  		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10860  			*block_id = i;
10861  			if (block_size)
10862  				*block_size = gaudi2->mapped_blocks[i].size;
10863  			return 0;
10864  		}
10865  	}
10866  
10867  	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10868  
10869  	return -EINVAL;
10870  }
10871  
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)10872  static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10873  			u32 block_id, u32 block_size)
10874  {
10875  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10876  	u64 offset_in_bar;
10877  	u64 address;
10878  	int rc;
10879  
10880  	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10881  		dev_err(hdev->dev, "Invalid block id %u", block_id);
10882  		return -EINVAL;
10883  	}
10884  
10885  	/* we allow mapping only an entire block */
10886  	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10887  		dev_err(hdev->dev, "Invalid block size %u", block_size);
10888  		return -EINVAL;
10889  	}
10890  
10891  	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10892  
10893  	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10894  
10895  	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10896  			VM_DONTCOPY | VM_NORESERVE);
10897  
10898  	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10899  			block_size, vma->vm_page_prot);
10900  	if (rc)
10901  		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10902  
10903  	return rc;
10904  }
10905  
gaudi2_enable_events_from_fw(struct hl_device * hdev)10906  static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10907  {
10908  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10909  
10910  	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10911  	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10912  
10913  	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10914  		WREG32(irq_handler_offset,
10915  			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10916  }
10917  
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)10918  static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10919  {
10920  	switch (mmu_id) {
10921  	case HW_CAP_DCORE0_DMMU0:
10922  		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10923  		break;
10924  	case HW_CAP_DCORE0_DMMU1:
10925  		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10926  		break;
10927  	case HW_CAP_DCORE0_DMMU2:
10928  		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10929  		break;
10930  	case HW_CAP_DCORE0_DMMU3:
10931  		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10932  		break;
10933  	case HW_CAP_DCORE1_DMMU0:
10934  		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10935  		break;
10936  	case HW_CAP_DCORE1_DMMU1:
10937  		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10938  		break;
10939  	case HW_CAP_DCORE1_DMMU2:
10940  		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10941  		break;
10942  	case HW_CAP_DCORE1_DMMU3:
10943  		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10944  		break;
10945  	case HW_CAP_DCORE2_DMMU0:
10946  		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10947  		break;
10948  	case HW_CAP_DCORE2_DMMU1:
10949  		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10950  		break;
10951  	case HW_CAP_DCORE2_DMMU2:
10952  		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10953  		break;
10954  	case HW_CAP_DCORE2_DMMU3:
10955  		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10956  		break;
10957  	case HW_CAP_DCORE3_DMMU0:
10958  		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10959  		break;
10960  	case HW_CAP_DCORE3_DMMU1:
10961  		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10962  		break;
10963  	case HW_CAP_DCORE3_DMMU2:
10964  		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10965  		break;
10966  	case HW_CAP_DCORE3_DMMU3:
10967  		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10968  		break;
10969  	case HW_CAP_PMMU:
10970  		*mmu_base = mmPMMU_HBW_MMU_BASE;
10971  		break;
10972  	default:
10973  		return -EINVAL;
10974  	}
10975  
10976  	return 0;
10977  }
10978  
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)10979  static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10980  {
10981  	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10982  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10983  	u32 mmu_base;
10984  
10985  	if (!(gaudi2->hw_cap_initialized & mmu_id))
10986  		return;
10987  
10988  	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10989  		return;
10990  
10991  	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10992  	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10993  }
10994  
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)10995  static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10996  {
10997  	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10998  
10999  	/* check all HMMUs */
11000  	for (i = 0 ; i < num_of_hmmus ; i++) {
11001  		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11002  
11003  		if (mmu_cap_mask & mmu_id)
11004  			gaudi2_ack_mmu_error(hdev, mmu_id);
11005  	}
11006  
11007  	/* check PMMU */
11008  	if (mmu_cap_mask & HW_CAP_PMMU)
11009  		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11010  
11011  	return 0;
11012  }
11013  
gaudi2_get_msi_info(__le32 * table)11014  static void gaudi2_get_msi_info(__le32 *table)
11015  {
11016  	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11017  }
11018  
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)11019  static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11020  {
11021  	switch (pll_idx) {
11022  	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11023  	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11024  	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11025  	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11026  	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11027  	case HL_GAUDI2_MME_PLL: return MME_PLL;
11028  	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11029  	case HL_GAUDI2_IF_PLL: return IF_PLL;
11030  	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11031  	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11032  	case HL_GAUDI2_VID_PLL: return VID_PLL;
11033  	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11034  	default: return -EINVAL;
11035  	}
11036  }
11037  
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)11038  static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11039  {
11040  	/* Not implemented */
11041  	return 0;
11042  }
11043  
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)11044  static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11045  {
11046  	/* Not implemented */
11047  	return 0;
11048  }
11049  
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)11050  static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11051  				struct hl_device *hdev, struct hl_mon_state_dump *mon)
11052  {
11053  	/* Not implemented */
11054  	return 0;
11055  }
11056  
11057  
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)11058  static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11059  				u64 status_base_offset, enum hl_sync_engine_type engine_type,
11060  				u32 engine_id, char **buf, size_t *size, size_t *offset)
11061  {
11062  	/* Not implemented */
11063  	return 0;
11064  }
11065  
11066  
11067  static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11068  	.monitor_valid = gaudi2_monitor_valid,
11069  	.print_single_monitor = gaudi2_print_single_monitor,
11070  	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11071  	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11072  };
11073  
gaudi2_state_dump_init(struct hl_device * hdev)11074  static void gaudi2_state_dump_init(struct hl_device *hdev)
11075  {
11076  	/* Not implemented */
11077  	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11078  	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11079  }
11080  
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)11081  static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11082  {
11083  	return 0;
11084  }
11085  
gaudi2_get_stream_master_qid_arr(void)11086  static u32 *gaudi2_get_stream_master_qid_arr(void)
11087  {
11088  	return NULL;
11089  }
11090  
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)11091  static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11092  				struct attribute_group *dev_vrm_attr_grp)
11093  {
11094  	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11095  	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11096  }
11097  
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)11098  static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11099  					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11100  {
11101  	struct asic_fixed_properties *prop = &hdev->asic_prop;
11102  
11103  	/* for host pages the page size must be  */
11104  	if (!is_dram_addr) {
11105  		if (page_size % mmu_prop->page_size)
11106  			goto page_size_err;
11107  
11108  		*real_page_size = mmu_prop->page_size;
11109  		return 0;
11110  	}
11111  
11112  	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11113  		goto page_size_err;
11114  
11115  	/*
11116  	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11117  	 * than DRAM page size).
11118  	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11119  	 * this mismatch when calculating the address to place in the MMU page table.
11120  	 * (in that case also make sure that the dram_page_size is not greater than the
11121  	 * mmu page size)
11122  	 */
11123  	*real_page_size = prop->dram_page_size;
11124  
11125  	return 0;
11126  
11127  page_size_err:
11128  	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11129  							page_size, mmu_prop->page_size >> 10);
11130  	return -EFAULT;
11131  }
11132  
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)11133  static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11134  {
11135  	return -EOPNOTSUPP;
11136  }
11137  
gaudi2_send_device_activity(struct hl_device * hdev,bool open)11138  int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11139  {
11140  	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11141  
11142  	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11143  		return 0;
11144  
11145  	return hl_fw_send_device_activity(hdev, open);
11146  }
11147  
11148  static const struct hl_asic_funcs gaudi2_funcs = {
11149  	.early_init = gaudi2_early_init,
11150  	.early_fini = gaudi2_early_fini,
11151  	.late_init = gaudi2_late_init,
11152  	.late_fini = gaudi2_late_fini,
11153  	.sw_init = gaudi2_sw_init,
11154  	.sw_fini = gaudi2_sw_fini,
11155  	.hw_init = gaudi2_hw_init,
11156  	.hw_fini = gaudi2_hw_fini,
11157  	.halt_engines = gaudi2_halt_engines,
11158  	.suspend = gaudi2_suspend,
11159  	.resume = gaudi2_resume,
11160  	.mmap = gaudi2_mmap,
11161  	.ring_doorbell = gaudi2_ring_doorbell,
11162  	.pqe_write = gaudi2_pqe_write,
11163  	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11164  	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11165  	.scrub_device_mem = gaudi2_scrub_device_mem,
11166  	.scrub_device_dram = gaudi2_scrub_device_dram,
11167  	.get_int_queue_base = NULL,
11168  	.test_queues = gaudi2_test_queues,
11169  	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11170  	.asic_dma_pool_free = gaudi2_dma_pool_free,
11171  	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11172  	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11173  	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
11174  	.asic_dma_map_single = gaudi2_dma_map_single,
11175  	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11176  	.cs_parser = gaudi2_cs_parser,
11177  	.asic_dma_map_sgtable = hl_dma_map_sgtable,
11178  	.add_end_of_cb_packets = NULL,
11179  	.update_eq_ci = gaudi2_update_eq_ci,
11180  	.context_switch = gaudi2_context_switch,
11181  	.restore_phase_topology = gaudi2_restore_phase_topology,
11182  	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11183  	.add_device_attr = gaudi2_add_device_attr,
11184  	.handle_eqe = gaudi2_handle_eqe,
11185  	.get_events_stat = gaudi2_get_events_stat,
11186  	.read_pte = NULL,
11187  	.write_pte = NULL,
11188  	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11189  	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11190  	.mmu_prefetch_cache_range = NULL,
11191  	.send_heartbeat = gaudi2_send_heartbeat,
11192  	.debug_coresight = gaudi2_debug_coresight,
11193  	.is_device_idle = gaudi2_is_device_idle,
11194  	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11195  	.hw_queues_lock = gaudi2_hw_queues_lock,
11196  	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11197  	.get_pci_id = gaudi2_get_pci_id,
11198  	.get_eeprom_data = gaudi2_get_eeprom_data,
11199  	.get_monitor_dump = gaudi2_get_monitor_dump,
11200  	.send_cpu_message = gaudi2_send_cpu_message,
11201  	.pci_bars_map = gaudi2_pci_bars_map,
11202  	.init_iatu = gaudi2_init_iatu,
11203  	.rreg = hl_rreg,
11204  	.wreg = hl_wreg,
11205  	.halt_coresight = gaudi2_halt_coresight,
11206  	.ctx_init = gaudi2_ctx_init,
11207  	.ctx_fini = gaudi2_ctx_fini,
11208  	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11209  	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11210  	.load_firmware_to_device = NULL,
11211  	.load_boot_fit_to_device = NULL,
11212  	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11213  	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11214  	.gen_signal_cb = gaudi2_gen_signal_cb,
11215  	.gen_wait_cb = gaudi2_gen_wait_cb,
11216  	.reset_sob = gaudi2_reset_sob,
11217  	.reset_sob_group = gaudi2_reset_sob_group,
11218  	.get_device_time = gaudi2_get_device_time,
11219  	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11220  	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11221  	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11222  	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11223  	.scramble_addr = gaudi2_mmu_scramble_addr,
11224  	.descramble_addr = gaudi2_mmu_descramble_addr,
11225  	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11226  	.get_hw_block_id = gaudi2_get_hw_block_id,
11227  	.hw_block_mmap = gaudi2_block_mmap,
11228  	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11229  	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11230  	.get_msi_info = gaudi2_get_msi_info,
11231  	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11232  	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11233  	.init_firmware_loader = gaudi2_init_firmware_loader,
11234  	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11235  	.state_dump_init = gaudi2_state_dump_init,
11236  	.get_sob_addr = &gaudi2_get_sob_addr,
11237  	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11238  	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11239  	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11240  	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11241  	.access_dev_mem = hl_access_dev_mem,
11242  	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11243  	.set_engine_cores = gaudi2_set_engine_cores,
11244  	.set_engines = gaudi2_set_engines,
11245  	.send_device_activity = gaudi2_send_device_activity,
11246  	.set_dram_properties = gaudi2_set_dram_properties,
11247  	.set_binning_masks = gaudi2_set_binning_masks,
11248  };
11249  
gaudi2_set_asic_funcs(struct hl_device * hdev)11250  void gaudi2_set_asic_funcs(struct hl_device *hdev)
11251  {
11252  	hdev->asic_funcs = &gaudi2_funcs;
11253  }
11254