xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision f519cd13)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 enum ta_ras_gfx_subblock {
135 	/*CPC*/
136 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPC_UCODE,
139 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	/* CPF*/
147 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 	TA_RAS_BLOCK__GFX_CPF_TAG,
151 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 	/* CPG*/
153 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 	TA_RAS_BLOCK__GFX_CPG_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 	/* GDS*/
159 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	/* SPI*/
167 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 	/* SQ*/
169 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 	/* SQC (3 ranges)*/
176 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 	/* SQC range 0*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	/* SQC range 1*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	/* SQC range 2*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 	/* TA*/
219 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	/* TCA*/
227 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	/* TCC (5 sub-ranges)*/
232 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 	/* TCC range 0*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	/* TCC range 1*/
245 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	/* TCC range 2*/
251 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	/* TCC range 3*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	/* TCC range 4*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 	/* TCI*/
277 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 	/* TCP*/
279 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	/* TD*/
289 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	/* EA (3 sub-ranges)*/
295 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 	/* EA range 0*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	/* EA range 1*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	/* EA range 2*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 	/* UTC VM L2 bank*/
326 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 	/* UTC VM walker*/
328 	TA_RAS_BLOCK__UTC_VML2_WALKER,
329 	/* UTC ATC L2 2MB cache*/
330 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 	/* UTC ATC L2 4KB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 	TA_RAS_BLOCK__GFX_MAX
334 };
335 
336 struct ras_gfx_subblock {
337 	unsigned char *name;
338 	int ta_subblock;
339 	int hw_supported_error_type;
340 	int sw_supported_error_type;
341 };
342 
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345 		#subblock,                                                     \
346 		TA_RAS_BLOCK__##subblock,                                      \
347 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349 	}
350 
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369 			     0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 			     0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382 			     0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 			     0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 			     0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392 			     1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 			     0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436 			     1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457 			     0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 			     0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464 			     0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500 
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
523 };
524 
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546 
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561 
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
588 };
589 
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600 
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623 
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646 
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666 
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
682 };
683 
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
696 };
697 
698 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
699 {
700 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708 };
709 
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
711 {
712 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720 };
721 
722 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
723 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
724 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
725 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
726 
727 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
731 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
732                                  struct amdgpu_cu_info *cu_info);
733 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
734 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
735 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
736 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
737 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
738 					  void *ras_error_status);
739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
740 				     void *inject_if);
741 
742 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
743 				uint64_t queue_mask)
744 {
745 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
746 	amdgpu_ring_write(kiq_ring,
747 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
748 		/* vmid_mask:0* queue_type:0 (KIQ) */
749 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
750 	amdgpu_ring_write(kiq_ring,
751 			lower_32_bits(queue_mask));	/* queue mask lo */
752 	amdgpu_ring_write(kiq_ring,
753 			upper_32_bits(queue_mask));	/* queue mask hi */
754 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
755 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
756 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
757 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
758 }
759 
760 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
761 				 struct amdgpu_ring *ring)
762 {
763 	struct amdgpu_device *adev = kiq_ring->adev;
764 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
765 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
766 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
767 
768 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
769 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
770 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
771 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
772 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
773 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
774 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
775 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
776 			 /*queue_type: normal compute queue */
777 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
778 			 /* alloc format: all_on_one_pipe */
779 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
780 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
781 			 /* num_queues: must be 1 */
782 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
783 	amdgpu_ring_write(kiq_ring,
784 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
785 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
786 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
787 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
788 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
789 }
790 
791 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
792 				   struct amdgpu_ring *ring,
793 				   enum amdgpu_unmap_queues_action action,
794 				   u64 gpu_addr, u64 seq)
795 {
796 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
797 
798 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
799 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
800 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
801 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
802 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
803 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
804 	amdgpu_ring_write(kiq_ring,
805 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
806 
807 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
808 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
809 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
810 		amdgpu_ring_write(kiq_ring, seq);
811 	} else {
812 		amdgpu_ring_write(kiq_ring, 0);
813 		amdgpu_ring_write(kiq_ring, 0);
814 		amdgpu_ring_write(kiq_ring, 0);
815 	}
816 }
817 
818 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
819 				   struct amdgpu_ring *ring,
820 				   u64 addr,
821 				   u64 seq)
822 {
823 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
824 
825 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
826 	amdgpu_ring_write(kiq_ring,
827 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
828 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
829 			  PACKET3_QUERY_STATUS_COMMAND(2));
830 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
831 	amdgpu_ring_write(kiq_ring,
832 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
833 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
834 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
835 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
836 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
837 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
838 }
839 
840 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
841 				uint16_t pasid, uint32_t flush_type,
842 				bool all_hub)
843 {
844 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
845 	amdgpu_ring_write(kiq_ring,
846 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
847 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
848 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
849 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
850 }
851 
852 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
853 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
854 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
855 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
856 	.kiq_query_status = gfx_v9_0_kiq_query_status,
857 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
858 	.set_resources_size = 8,
859 	.map_queues_size = 7,
860 	.unmap_queues_size = 6,
861 	.query_status_size = 7,
862 	.invalidate_tlbs_size = 2,
863 };
864 
865 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
866 {
867 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
868 }
869 
870 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
871 {
872 	switch (adev->asic_type) {
873 	case CHIP_VEGA10:
874 		soc15_program_register_sequence(adev,
875 						golden_settings_gc_9_0,
876 						ARRAY_SIZE(golden_settings_gc_9_0));
877 		soc15_program_register_sequence(adev,
878 						golden_settings_gc_9_0_vg10,
879 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
880 		break;
881 	case CHIP_VEGA12:
882 		soc15_program_register_sequence(adev,
883 						golden_settings_gc_9_2_1,
884 						ARRAY_SIZE(golden_settings_gc_9_2_1));
885 		soc15_program_register_sequence(adev,
886 						golden_settings_gc_9_2_1_vg12,
887 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
888 		break;
889 	case CHIP_VEGA20:
890 		soc15_program_register_sequence(adev,
891 						golden_settings_gc_9_0,
892 						ARRAY_SIZE(golden_settings_gc_9_0));
893 		soc15_program_register_sequence(adev,
894 						golden_settings_gc_9_0_vg20,
895 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
896 		break;
897 	case CHIP_ARCTURUS:
898 		soc15_program_register_sequence(adev,
899 						golden_settings_gc_9_4_1_arct,
900 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
901 		break;
902 	case CHIP_RAVEN:
903 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
904 						ARRAY_SIZE(golden_settings_gc_9_1));
905 		if (adev->rev_id >= 8)
906 			soc15_program_register_sequence(adev,
907 							golden_settings_gc_9_1_rv2,
908 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
909 		else
910 			soc15_program_register_sequence(adev,
911 							golden_settings_gc_9_1_rv1,
912 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
913 		break;
914 	 case CHIP_RENOIR:
915 		soc15_program_register_sequence(adev,
916 						golden_settings_gc_9_1_rn,
917 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
918 		return; /* for renoir, don't need common goldensetting */
919 	default:
920 		break;
921 	}
922 
923 	if (adev->asic_type != CHIP_ARCTURUS)
924 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
925 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
926 }
927 
928 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
929 {
930 	adev->gfx.scratch.num_reg = 8;
931 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
932 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
933 }
934 
935 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
936 				       bool wc, uint32_t reg, uint32_t val)
937 {
938 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
939 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
940 				WRITE_DATA_DST_SEL(0) |
941 				(wc ? WR_CONFIRM : 0));
942 	amdgpu_ring_write(ring, reg);
943 	amdgpu_ring_write(ring, 0);
944 	amdgpu_ring_write(ring, val);
945 }
946 
947 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
948 				  int mem_space, int opt, uint32_t addr0,
949 				  uint32_t addr1, uint32_t ref, uint32_t mask,
950 				  uint32_t inv)
951 {
952 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
953 	amdgpu_ring_write(ring,
954 				 /* memory (1) or register (0) */
955 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
956 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
957 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
958 				 WAIT_REG_MEM_ENGINE(eng_sel)));
959 
960 	if (mem_space)
961 		BUG_ON(addr0 & 0x3); /* Dword align */
962 	amdgpu_ring_write(ring, addr0);
963 	amdgpu_ring_write(ring, addr1);
964 	amdgpu_ring_write(ring, ref);
965 	amdgpu_ring_write(ring, mask);
966 	amdgpu_ring_write(ring, inv); /* poll interval */
967 }
968 
969 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
970 {
971 	struct amdgpu_device *adev = ring->adev;
972 	uint32_t scratch;
973 	uint32_t tmp = 0;
974 	unsigned i;
975 	int r;
976 
977 	r = amdgpu_gfx_scratch_get(adev, &scratch);
978 	if (r)
979 		return r;
980 
981 	WREG32(scratch, 0xCAFEDEAD);
982 	r = amdgpu_ring_alloc(ring, 3);
983 	if (r)
984 		goto error_free_scratch;
985 
986 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
987 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
988 	amdgpu_ring_write(ring, 0xDEADBEEF);
989 	amdgpu_ring_commit(ring);
990 
991 	for (i = 0; i < adev->usec_timeout; i++) {
992 		tmp = RREG32(scratch);
993 		if (tmp == 0xDEADBEEF)
994 			break;
995 		udelay(1);
996 	}
997 
998 	if (i >= adev->usec_timeout)
999 		r = -ETIMEDOUT;
1000 
1001 error_free_scratch:
1002 	amdgpu_gfx_scratch_free(adev, scratch);
1003 	return r;
1004 }
1005 
1006 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1007 {
1008 	struct amdgpu_device *adev = ring->adev;
1009 	struct amdgpu_ib ib;
1010 	struct dma_fence *f = NULL;
1011 
1012 	unsigned index;
1013 	uint64_t gpu_addr;
1014 	uint32_t tmp;
1015 	long r;
1016 
1017 	r = amdgpu_device_wb_get(adev, &index);
1018 	if (r)
1019 		return r;
1020 
1021 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1022 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1023 	memset(&ib, 0, sizeof(ib));
1024 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
1025 	if (r)
1026 		goto err1;
1027 
1028 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1029 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1030 	ib.ptr[2] = lower_32_bits(gpu_addr);
1031 	ib.ptr[3] = upper_32_bits(gpu_addr);
1032 	ib.ptr[4] = 0xDEADBEEF;
1033 	ib.length_dw = 5;
1034 
1035 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1036 	if (r)
1037 		goto err2;
1038 
1039 	r = dma_fence_wait_timeout(f, false, timeout);
1040 	if (r == 0) {
1041 		r = -ETIMEDOUT;
1042 		goto err2;
1043 	} else if (r < 0) {
1044 		goto err2;
1045 	}
1046 
1047 	tmp = adev->wb.wb[index];
1048 	if (tmp == 0xDEADBEEF)
1049 		r = 0;
1050 	else
1051 		r = -EINVAL;
1052 
1053 err2:
1054 	amdgpu_ib_free(adev, &ib, NULL);
1055 	dma_fence_put(f);
1056 err1:
1057 	amdgpu_device_wb_free(adev, index);
1058 	return r;
1059 }
1060 
1061 
1062 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1063 {
1064 	release_firmware(adev->gfx.pfp_fw);
1065 	adev->gfx.pfp_fw = NULL;
1066 	release_firmware(adev->gfx.me_fw);
1067 	adev->gfx.me_fw = NULL;
1068 	release_firmware(adev->gfx.ce_fw);
1069 	adev->gfx.ce_fw = NULL;
1070 	release_firmware(adev->gfx.rlc_fw);
1071 	adev->gfx.rlc_fw = NULL;
1072 	release_firmware(adev->gfx.mec_fw);
1073 	adev->gfx.mec_fw = NULL;
1074 	release_firmware(adev->gfx.mec2_fw);
1075 	adev->gfx.mec2_fw = NULL;
1076 
1077 	kfree(adev->gfx.rlc.register_list_format);
1078 }
1079 
1080 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1081 {
1082 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1083 
1084 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1085 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1086 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1087 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1088 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1089 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1090 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1091 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1092 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1093 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1094 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1095 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1096 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1097 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1098 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1099 }
1100 
1101 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1102 {
1103 	adev->gfx.me_fw_write_wait = false;
1104 	adev->gfx.mec_fw_write_wait = false;
1105 
1106 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1107 	    (adev->gfx.mec_feature_version < 46) ||
1108 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1109 	    (adev->gfx.pfp_feature_version < 46))
1110 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1111 
1112 	switch (adev->asic_type) {
1113 	case CHIP_VEGA10:
1114 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1115 		    (adev->gfx.me_feature_version >= 42) &&
1116 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1117 		    (adev->gfx.pfp_feature_version >= 42))
1118 			adev->gfx.me_fw_write_wait = true;
1119 
1120 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1121 		    (adev->gfx.mec_feature_version >= 42))
1122 			adev->gfx.mec_fw_write_wait = true;
1123 		break;
1124 	case CHIP_VEGA12:
1125 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1126 		    (adev->gfx.me_feature_version >= 44) &&
1127 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1128 		    (adev->gfx.pfp_feature_version >= 44))
1129 			adev->gfx.me_fw_write_wait = true;
1130 
1131 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1132 		    (adev->gfx.mec_feature_version >= 44))
1133 			adev->gfx.mec_fw_write_wait = true;
1134 		break;
1135 	case CHIP_VEGA20:
1136 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1137 		    (adev->gfx.me_feature_version >= 44) &&
1138 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1139 		    (adev->gfx.pfp_feature_version >= 44))
1140 			adev->gfx.me_fw_write_wait = true;
1141 
1142 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1143 		    (adev->gfx.mec_feature_version >= 44))
1144 			adev->gfx.mec_fw_write_wait = true;
1145 		break;
1146 	case CHIP_RAVEN:
1147 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1148 		    (adev->gfx.me_feature_version >= 42) &&
1149 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1150 		    (adev->gfx.pfp_feature_version >= 42))
1151 			adev->gfx.me_fw_write_wait = true;
1152 
1153 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1154 		    (adev->gfx.mec_feature_version >= 42))
1155 			adev->gfx.mec_fw_write_wait = true;
1156 		break;
1157 	default:
1158 		break;
1159 	}
1160 }
1161 
1162 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1163 {
1164 	switch (adev->asic_type) {
1165 	case CHIP_VEGA10:
1166 	case CHIP_VEGA12:
1167 	case CHIP_VEGA20:
1168 		break;
1169 	case CHIP_RAVEN:
1170 		if (!(adev->rev_id >= 0x8 ||
1171 		      adev->pdev->device == 0x15d8) &&
1172 		    (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1173 		     !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1174 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1175 
1176 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1177 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1178 				AMD_PG_SUPPORT_CP |
1179 				AMD_PG_SUPPORT_RLC_SMU_HS;
1180 		break;
1181 	case CHIP_RENOIR:
1182 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1183 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1184 				AMD_PG_SUPPORT_CP |
1185 				AMD_PG_SUPPORT_RLC_SMU_HS;
1186 		break;
1187 	default:
1188 		break;
1189 	}
1190 }
1191 
1192 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1193 					  const char *chip_name)
1194 {
1195 	char fw_name[30];
1196 	int err;
1197 	struct amdgpu_firmware_info *info = NULL;
1198 	const struct common_firmware_header *header = NULL;
1199 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1200 
1201 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1202 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1203 	if (err)
1204 		goto out;
1205 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1206 	if (err)
1207 		goto out;
1208 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1209 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1210 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1211 
1212 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1213 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1214 	if (err)
1215 		goto out;
1216 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1217 	if (err)
1218 		goto out;
1219 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1220 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1221 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1222 
1223 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1224 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1225 	if (err)
1226 		goto out;
1227 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1228 	if (err)
1229 		goto out;
1230 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1231 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1232 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1233 
1234 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1235 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1236 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1237 		info->fw = adev->gfx.pfp_fw;
1238 		header = (const struct common_firmware_header *)info->fw->data;
1239 		adev->firmware.fw_size +=
1240 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1241 
1242 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1243 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1244 		info->fw = adev->gfx.me_fw;
1245 		header = (const struct common_firmware_header *)info->fw->data;
1246 		adev->firmware.fw_size +=
1247 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1248 
1249 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1250 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1251 		info->fw = adev->gfx.ce_fw;
1252 		header = (const struct common_firmware_header *)info->fw->data;
1253 		adev->firmware.fw_size +=
1254 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1255 	}
1256 
1257 out:
1258 	if (err) {
1259 		dev_err(adev->dev,
1260 			"gfx9: Failed to load firmware \"%s\"\n",
1261 			fw_name);
1262 		release_firmware(adev->gfx.pfp_fw);
1263 		adev->gfx.pfp_fw = NULL;
1264 		release_firmware(adev->gfx.me_fw);
1265 		adev->gfx.me_fw = NULL;
1266 		release_firmware(adev->gfx.ce_fw);
1267 		adev->gfx.ce_fw = NULL;
1268 	}
1269 	return err;
1270 }
1271 
1272 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1273 					  const char *chip_name)
1274 {
1275 	char fw_name[30];
1276 	int err;
1277 	struct amdgpu_firmware_info *info = NULL;
1278 	const struct common_firmware_header *header = NULL;
1279 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1280 	unsigned int *tmp = NULL;
1281 	unsigned int i = 0;
1282 	uint16_t version_major;
1283 	uint16_t version_minor;
1284 	uint32_t smu_version;
1285 
1286 	/*
1287 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1288 	 * instead of picasso_rlc.bin.
1289 	 * Judgment method:
1290 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1291 	 *          or revision >= 0xD8 && revision <= 0xDF
1292 	 * otherwise is PCO FP5
1293 	 */
1294 	if (!strcmp(chip_name, "picasso") &&
1295 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1296 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1297 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1298 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1299 		(smu_version >= 0x41e2b))
1300 		/**
1301 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1302 		*/
1303 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1304 	else
1305 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1306 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1307 	if (err)
1308 		goto out;
1309 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1310 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1311 
1312 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1313 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1314 	if (version_major == 2 && version_minor == 1)
1315 		adev->gfx.rlc.is_rlc_v2_1 = true;
1316 
1317 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1318 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1319 	adev->gfx.rlc.save_and_restore_offset =
1320 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1321 	adev->gfx.rlc.clear_state_descriptor_offset =
1322 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1323 	adev->gfx.rlc.avail_scratch_ram_locations =
1324 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1325 	adev->gfx.rlc.reg_restore_list_size =
1326 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1327 	adev->gfx.rlc.reg_list_format_start =
1328 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1329 	adev->gfx.rlc.reg_list_format_separate_start =
1330 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1331 	adev->gfx.rlc.starting_offsets_start =
1332 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1333 	adev->gfx.rlc.reg_list_format_size_bytes =
1334 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1335 	adev->gfx.rlc.reg_list_size_bytes =
1336 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1337 	adev->gfx.rlc.register_list_format =
1338 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1339 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1340 	if (!adev->gfx.rlc.register_list_format) {
1341 		err = -ENOMEM;
1342 		goto out;
1343 	}
1344 
1345 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1346 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1347 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1348 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1349 
1350 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1351 
1352 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1353 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1354 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1355 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1356 
1357 	if (adev->gfx.rlc.is_rlc_v2_1)
1358 		gfx_v9_0_init_rlc_ext_microcode(adev);
1359 
1360 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1361 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1362 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1363 		info->fw = adev->gfx.rlc_fw;
1364 		header = (const struct common_firmware_header *)info->fw->data;
1365 		adev->firmware.fw_size +=
1366 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1367 
1368 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1369 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1370 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1371 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1372 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1373 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1374 			info->fw = adev->gfx.rlc_fw;
1375 			adev->firmware.fw_size +=
1376 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1377 
1378 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1379 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1380 			info->fw = adev->gfx.rlc_fw;
1381 			adev->firmware.fw_size +=
1382 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1383 
1384 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1385 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1386 			info->fw = adev->gfx.rlc_fw;
1387 			adev->firmware.fw_size +=
1388 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1389 		}
1390 	}
1391 
1392 out:
1393 	if (err) {
1394 		dev_err(adev->dev,
1395 			"gfx9: Failed to load firmware \"%s\"\n",
1396 			fw_name);
1397 		release_firmware(adev->gfx.rlc_fw);
1398 		adev->gfx.rlc_fw = NULL;
1399 	}
1400 	return err;
1401 }
1402 
1403 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1404 					  const char *chip_name)
1405 {
1406 	char fw_name[30];
1407 	int err;
1408 	struct amdgpu_firmware_info *info = NULL;
1409 	const struct common_firmware_header *header = NULL;
1410 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1411 
1412 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1413 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1414 	if (err)
1415 		goto out;
1416 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1417 	if (err)
1418 		goto out;
1419 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1420 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1421 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1422 
1423 
1424 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1425 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1426 	if (!err) {
1427 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1428 		if (err)
1429 			goto out;
1430 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1431 		adev->gfx.mec2_fw->data;
1432 		adev->gfx.mec2_fw_version =
1433 		le32_to_cpu(cp_hdr->header.ucode_version);
1434 		adev->gfx.mec2_feature_version =
1435 		le32_to_cpu(cp_hdr->ucode_feature_version);
1436 	} else {
1437 		err = 0;
1438 		adev->gfx.mec2_fw = NULL;
1439 	}
1440 
1441 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1442 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1443 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1444 		info->fw = adev->gfx.mec_fw;
1445 		header = (const struct common_firmware_header *)info->fw->data;
1446 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1447 		adev->firmware.fw_size +=
1448 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1449 
1450 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1451 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1452 		info->fw = adev->gfx.mec_fw;
1453 		adev->firmware.fw_size +=
1454 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1455 
1456 		if (adev->gfx.mec2_fw) {
1457 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1458 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1459 			info->fw = adev->gfx.mec2_fw;
1460 			header = (const struct common_firmware_header *)info->fw->data;
1461 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1462 			adev->firmware.fw_size +=
1463 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1464 
1465 			/* TODO: Determine if MEC2 JT FW loading can be removed
1466 				 for all GFX V9 asic and above */
1467 			if (adev->asic_type != CHIP_ARCTURUS &&
1468 			    adev->asic_type != CHIP_RENOIR) {
1469 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1470 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1471 				info->fw = adev->gfx.mec2_fw;
1472 				adev->firmware.fw_size +=
1473 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1474 					PAGE_SIZE);
1475 			}
1476 		}
1477 	}
1478 
1479 out:
1480 	gfx_v9_0_check_if_need_gfxoff(adev);
1481 	gfx_v9_0_check_fw_write_wait(adev);
1482 	if (err) {
1483 		dev_err(adev->dev,
1484 			"gfx9: Failed to load firmware \"%s\"\n",
1485 			fw_name);
1486 		release_firmware(adev->gfx.mec_fw);
1487 		adev->gfx.mec_fw = NULL;
1488 		release_firmware(adev->gfx.mec2_fw);
1489 		adev->gfx.mec2_fw = NULL;
1490 	}
1491 	return err;
1492 }
1493 
1494 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1495 {
1496 	const char *chip_name;
1497 	int r;
1498 
1499 	DRM_DEBUG("\n");
1500 
1501 	switch (adev->asic_type) {
1502 	case CHIP_VEGA10:
1503 		chip_name = "vega10";
1504 		break;
1505 	case CHIP_VEGA12:
1506 		chip_name = "vega12";
1507 		break;
1508 	case CHIP_VEGA20:
1509 		chip_name = "vega20";
1510 		break;
1511 	case CHIP_RAVEN:
1512 		if (adev->rev_id >= 8)
1513 			chip_name = "raven2";
1514 		else if (adev->pdev->device == 0x15d8)
1515 			chip_name = "picasso";
1516 		else
1517 			chip_name = "raven";
1518 		break;
1519 	case CHIP_ARCTURUS:
1520 		chip_name = "arcturus";
1521 		break;
1522 	case CHIP_RENOIR:
1523 		chip_name = "renoir";
1524 		break;
1525 	default:
1526 		BUG();
1527 	}
1528 
1529 	/* No CPG in Arcturus */
1530 	if (adev->asic_type != CHIP_ARCTURUS) {
1531 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1532 		if (r)
1533 			return r;
1534 	}
1535 
1536 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1537 	if (r)
1538 		return r;
1539 
1540 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1541 	if (r)
1542 		return r;
1543 
1544 	return r;
1545 }
1546 
1547 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1548 {
1549 	u32 count = 0;
1550 	const struct cs_section_def *sect = NULL;
1551 	const struct cs_extent_def *ext = NULL;
1552 
1553 	/* begin clear state */
1554 	count += 2;
1555 	/* context control state */
1556 	count += 3;
1557 
1558 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1559 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1560 			if (sect->id == SECT_CONTEXT)
1561 				count += 2 + ext->reg_count;
1562 			else
1563 				return 0;
1564 		}
1565 	}
1566 
1567 	/* end clear state */
1568 	count += 2;
1569 	/* clear state */
1570 	count += 2;
1571 
1572 	return count;
1573 }
1574 
1575 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1576 				    volatile u32 *buffer)
1577 {
1578 	u32 count = 0, i;
1579 	const struct cs_section_def *sect = NULL;
1580 	const struct cs_extent_def *ext = NULL;
1581 
1582 	if (adev->gfx.rlc.cs_data == NULL)
1583 		return;
1584 	if (buffer == NULL)
1585 		return;
1586 
1587 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1588 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1589 
1590 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1591 	buffer[count++] = cpu_to_le32(0x80000000);
1592 	buffer[count++] = cpu_to_le32(0x80000000);
1593 
1594 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1595 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1596 			if (sect->id == SECT_CONTEXT) {
1597 				buffer[count++] =
1598 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1599 				buffer[count++] = cpu_to_le32(ext->reg_index -
1600 						PACKET3_SET_CONTEXT_REG_START);
1601 				for (i = 0; i < ext->reg_count; i++)
1602 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1603 			} else {
1604 				return;
1605 			}
1606 		}
1607 	}
1608 
1609 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1610 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1611 
1612 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1613 	buffer[count++] = cpu_to_le32(0);
1614 }
1615 
1616 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1617 {
1618 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1619 	uint32_t pg_always_on_cu_num = 2;
1620 	uint32_t always_on_cu_num;
1621 	uint32_t i, j, k;
1622 	uint32_t mask, cu_bitmap, counter;
1623 
1624 	if (adev->flags & AMD_IS_APU)
1625 		always_on_cu_num = 4;
1626 	else if (adev->asic_type == CHIP_VEGA12)
1627 		always_on_cu_num = 8;
1628 	else
1629 		always_on_cu_num = 12;
1630 
1631 	mutex_lock(&adev->grbm_idx_mutex);
1632 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1633 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1634 			mask = 1;
1635 			cu_bitmap = 0;
1636 			counter = 0;
1637 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1638 
1639 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1640 				if (cu_info->bitmap[i][j] & mask) {
1641 					if (counter == pg_always_on_cu_num)
1642 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1643 					if (counter < always_on_cu_num)
1644 						cu_bitmap |= mask;
1645 					else
1646 						break;
1647 					counter++;
1648 				}
1649 				mask <<= 1;
1650 			}
1651 
1652 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1653 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1654 		}
1655 	}
1656 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1657 	mutex_unlock(&adev->grbm_idx_mutex);
1658 }
1659 
1660 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1661 {
1662 	uint32_t data;
1663 
1664 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1665 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1666 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1667 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1668 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1669 
1670 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1671 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1672 
1673 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1674 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1675 
1676 	mutex_lock(&adev->grbm_idx_mutex);
1677 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1678 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1679 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1680 
1681 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1682 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1683 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1684 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1685 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1686 
1687 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1688 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1689 	data &= 0x0000FFFF;
1690 	data |= 0x00C00000;
1691 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1692 
1693 	/*
1694 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1695 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1696 	 */
1697 
1698 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1699 	 * but used for RLC_LB_CNTL configuration */
1700 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1701 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1702 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1703 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1704 	mutex_unlock(&adev->grbm_idx_mutex);
1705 
1706 	gfx_v9_0_init_always_on_cu_mask(adev);
1707 }
1708 
1709 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1710 {
1711 	uint32_t data;
1712 
1713 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1714 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1715 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1716 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1717 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1718 
1719 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1720 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1721 
1722 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1723 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1724 
1725 	mutex_lock(&adev->grbm_idx_mutex);
1726 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1727 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1728 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1729 
1730 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1731 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1732 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1733 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1734 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1735 
1736 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1737 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1738 	data &= 0x0000FFFF;
1739 	data |= 0x00C00000;
1740 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1741 
1742 	/*
1743 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1744 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1745 	 */
1746 
1747 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1748 	 * but used for RLC_LB_CNTL configuration */
1749 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1750 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1751 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1752 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1753 	mutex_unlock(&adev->grbm_idx_mutex);
1754 
1755 	gfx_v9_0_init_always_on_cu_mask(adev);
1756 }
1757 
1758 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1759 {
1760 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1761 }
1762 
1763 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1764 {
1765 	return 5;
1766 }
1767 
1768 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1769 {
1770 	const struct cs_section_def *cs_data;
1771 	int r;
1772 
1773 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1774 
1775 	cs_data = adev->gfx.rlc.cs_data;
1776 
1777 	if (cs_data) {
1778 		/* init clear state block */
1779 		r = amdgpu_gfx_rlc_init_csb(adev);
1780 		if (r)
1781 			return r;
1782 	}
1783 
1784 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1785 		/* TODO: double check the cp_table_size for RV */
1786 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1787 		r = amdgpu_gfx_rlc_init_cpt(adev);
1788 		if (r)
1789 			return r;
1790 	}
1791 
1792 	switch (adev->asic_type) {
1793 	case CHIP_RAVEN:
1794 		gfx_v9_0_init_lbpw(adev);
1795 		break;
1796 	case CHIP_VEGA20:
1797 		gfx_v9_4_init_lbpw(adev);
1798 		break;
1799 	default:
1800 		break;
1801 	}
1802 
1803 	return 0;
1804 }
1805 
1806 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1807 {
1808 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1809 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1810 }
1811 
1812 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1813 {
1814 	int r;
1815 	u32 *hpd;
1816 	const __le32 *fw_data;
1817 	unsigned fw_size;
1818 	u32 *fw;
1819 	size_t mec_hpd_size;
1820 
1821 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1822 
1823 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1824 
1825 	/* take ownership of the relevant compute queues */
1826 	amdgpu_gfx_compute_queue_acquire(adev);
1827 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1828 
1829 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1830 				      AMDGPU_GEM_DOMAIN_VRAM,
1831 				      &adev->gfx.mec.hpd_eop_obj,
1832 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1833 				      (void **)&hpd);
1834 	if (r) {
1835 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1836 		gfx_v9_0_mec_fini(adev);
1837 		return r;
1838 	}
1839 
1840 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1841 
1842 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1843 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1844 
1845 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1846 
1847 	fw_data = (const __le32 *)
1848 		(adev->gfx.mec_fw->data +
1849 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1850 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1851 
1852 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1853 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1854 				      &adev->gfx.mec.mec_fw_obj,
1855 				      &adev->gfx.mec.mec_fw_gpu_addr,
1856 				      (void **)&fw);
1857 	if (r) {
1858 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1859 		gfx_v9_0_mec_fini(adev);
1860 		return r;
1861 	}
1862 
1863 	memcpy(fw, fw_data, fw_size);
1864 
1865 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1866 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1867 
1868 	return 0;
1869 }
1870 
1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1872 {
1873 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1874 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1875 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1876 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1877 		(SQ_IND_INDEX__FORCE_READ_MASK));
1878 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1879 }
1880 
1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1882 			   uint32_t wave, uint32_t thread,
1883 			   uint32_t regno, uint32_t num, uint32_t *out)
1884 {
1885 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1886 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1887 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1888 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1889 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1890 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1891 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1892 	while (num--)
1893 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1894 }
1895 
1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1897 {
1898 	/* type 1 wave data */
1899 	dst[(*no_fields)++] = 1;
1900 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1901 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1902 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1903 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1904 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1905 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1906 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1907 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1908 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1909 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1910 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1911 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1912 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1913 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1914 }
1915 
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1917 				     uint32_t wave, uint32_t start,
1918 				     uint32_t size, uint32_t *dst)
1919 {
1920 	wave_read_regs(
1921 		adev, simd, wave, 0,
1922 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1923 }
1924 
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1926 				     uint32_t wave, uint32_t thread,
1927 				     uint32_t start, uint32_t size,
1928 				     uint32_t *dst)
1929 {
1930 	wave_read_regs(
1931 		adev, simd, wave, thread,
1932 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1933 }
1934 
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936 				  u32 me, u32 pipe, u32 q, u32 vm)
1937 {
1938 	soc15_grbm_select(adev, me, pipe, q, vm);
1939 }
1940 
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943 	.select_se_sh = &gfx_v9_0_select_se_sh,
1944 	.read_wave_data = &gfx_v9_0_read_wave_data,
1945 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1948 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1949 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1950 };
1951 
1952 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1953 {
1954 	u32 gb_addr_config;
1955 	int err;
1956 
1957 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1958 
1959 	switch (adev->asic_type) {
1960 	case CHIP_VEGA10:
1961 		adev->gfx.config.max_hw_contexts = 8;
1962 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1963 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1964 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1965 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1966 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1967 		break;
1968 	case CHIP_VEGA12:
1969 		adev->gfx.config.max_hw_contexts = 8;
1970 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1975 		DRM_INFO("fix gfx.config for vega12\n");
1976 		break;
1977 	case CHIP_VEGA20:
1978 		adev->gfx.config.max_hw_contexts = 8;
1979 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1980 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1981 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1982 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1983 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1984 		gb_addr_config &= ~0xf3e777ff;
1985 		gb_addr_config |= 0x22014042;
1986 		/* check vbios table if gpu info is not available */
1987 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1988 		if (err)
1989 			return err;
1990 		break;
1991 	case CHIP_RAVEN:
1992 		adev->gfx.config.max_hw_contexts = 8;
1993 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1994 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1995 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1996 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1997 		if (adev->rev_id >= 8)
1998 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1999 		else
2000 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2001 		break;
2002 	case CHIP_ARCTURUS:
2003 		adev->gfx.config.max_hw_contexts = 8;
2004 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2005 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2006 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2007 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2008 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2009 		gb_addr_config &= ~0xf3e777ff;
2010 		gb_addr_config |= 0x22014042;
2011 		break;
2012 	case CHIP_RENOIR:
2013 		adev->gfx.config.max_hw_contexts = 8;
2014 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2015 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2016 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2017 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2018 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2019 		gb_addr_config &= ~0xf3e777ff;
2020 		gb_addr_config |= 0x22010042;
2021 		break;
2022 	default:
2023 		BUG();
2024 		break;
2025 	}
2026 
2027 	adev->gfx.config.gb_addr_config = gb_addr_config;
2028 
2029 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2030 			REG_GET_FIELD(
2031 					adev->gfx.config.gb_addr_config,
2032 					GB_ADDR_CONFIG,
2033 					NUM_PIPES);
2034 
2035 	adev->gfx.config.max_tile_pipes =
2036 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2037 
2038 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2039 			REG_GET_FIELD(
2040 					adev->gfx.config.gb_addr_config,
2041 					GB_ADDR_CONFIG,
2042 					NUM_BANKS);
2043 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2044 			REG_GET_FIELD(
2045 					adev->gfx.config.gb_addr_config,
2046 					GB_ADDR_CONFIG,
2047 					MAX_COMPRESSED_FRAGS);
2048 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2049 			REG_GET_FIELD(
2050 					adev->gfx.config.gb_addr_config,
2051 					GB_ADDR_CONFIG,
2052 					NUM_RB_PER_SE);
2053 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2054 			REG_GET_FIELD(
2055 					adev->gfx.config.gb_addr_config,
2056 					GB_ADDR_CONFIG,
2057 					NUM_SHADER_ENGINES);
2058 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2059 			REG_GET_FIELD(
2060 					adev->gfx.config.gb_addr_config,
2061 					GB_ADDR_CONFIG,
2062 					PIPE_INTERLEAVE_SIZE));
2063 
2064 	return 0;
2065 }
2066 
2067 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2068 				      int mec, int pipe, int queue)
2069 {
2070 	int r;
2071 	unsigned irq_type;
2072 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2073 
2074 	ring = &adev->gfx.compute_ring[ring_id];
2075 
2076 	/* mec0 is me1 */
2077 	ring->me = mec + 1;
2078 	ring->pipe = pipe;
2079 	ring->queue = queue;
2080 
2081 	ring->ring_obj = NULL;
2082 	ring->use_doorbell = true;
2083 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2084 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2085 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2086 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2087 
2088 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2089 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2090 		+ ring->pipe;
2091 
2092 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2093 	r = amdgpu_ring_init(adev, ring, 1024,
2094 			     &adev->gfx.eop_irq, irq_type);
2095 	if (r)
2096 		return r;
2097 
2098 
2099 	return 0;
2100 }
2101 
2102 static int gfx_v9_0_sw_init(void *handle)
2103 {
2104 	int i, j, k, r, ring_id;
2105 	struct amdgpu_ring *ring;
2106 	struct amdgpu_kiq *kiq;
2107 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2108 
2109 	switch (adev->asic_type) {
2110 	case CHIP_VEGA10:
2111 	case CHIP_VEGA12:
2112 	case CHIP_VEGA20:
2113 	case CHIP_RAVEN:
2114 	case CHIP_ARCTURUS:
2115 	case CHIP_RENOIR:
2116 		adev->gfx.mec.num_mec = 2;
2117 		break;
2118 	default:
2119 		adev->gfx.mec.num_mec = 1;
2120 		break;
2121 	}
2122 
2123 	adev->gfx.mec.num_pipe_per_mec = 4;
2124 	adev->gfx.mec.num_queue_per_pipe = 8;
2125 
2126 	/* EOP Event */
2127 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2128 	if (r)
2129 		return r;
2130 
2131 	/* Privileged reg */
2132 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2133 			      &adev->gfx.priv_reg_irq);
2134 	if (r)
2135 		return r;
2136 
2137 	/* Privileged inst */
2138 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2139 			      &adev->gfx.priv_inst_irq);
2140 	if (r)
2141 		return r;
2142 
2143 	/* ECC error */
2144 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2145 			      &adev->gfx.cp_ecc_error_irq);
2146 	if (r)
2147 		return r;
2148 
2149 	/* FUE error */
2150 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2151 			      &adev->gfx.cp_ecc_error_irq);
2152 	if (r)
2153 		return r;
2154 
2155 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2156 
2157 	gfx_v9_0_scratch_init(adev);
2158 
2159 	r = gfx_v9_0_init_microcode(adev);
2160 	if (r) {
2161 		DRM_ERROR("Failed to load gfx firmware!\n");
2162 		return r;
2163 	}
2164 
2165 	r = adev->gfx.rlc.funcs->init(adev);
2166 	if (r) {
2167 		DRM_ERROR("Failed to init rlc BOs!\n");
2168 		return r;
2169 	}
2170 
2171 	r = gfx_v9_0_mec_init(adev);
2172 	if (r) {
2173 		DRM_ERROR("Failed to init MEC BOs!\n");
2174 		return r;
2175 	}
2176 
2177 	/* set up the gfx ring */
2178 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2179 		ring = &adev->gfx.gfx_ring[i];
2180 		ring->ring_obj = NULL;
2181 		if (!i)
2182 			sprintf(ring->name, "gfx");
2183 		else
2184 			sprintf(ring->name, "gfx_%d", i);
2185 		ring->use_doorbell = true;
2186 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2187 		r = amdgpu_ring_init(adev, ring, 1024,
2188 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2189 		if (r)
2190 			return r;
2191 	}
2192 
2193 	/* set up the compute queues - allocate horizontally across pipes */
2194 	ring_id = 0;
2195 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2196 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2197 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2198 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2199 					continue;
2200 
2201 				r = gfx_v9_0_compute_ring_init(adev,
2202 							       ring_id,
2203 							       i, k, j);
2204 				if (r)
2205 					return r;
2206 
2207 				ring_id++;
2208 			}
2209 		}
2210 	}
2211 
2212 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2213 	if (r) {
2214 		DRM_ERROR("Failed to init KIQ BOs!\n");
2215 		return r;
2216 	}
2217 
2218 	kiq = &adev->gfx.kiq;
2219 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2220 	if (r)
2221 		return r;
2222 
2223 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2224 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2225 	if (r)
2226 		return r;
2227 
2228 	adev->gfx.ce_ram_size = 0x8000;
2229 
2230 	r = gfx_v9_0_gpu_early_init(adev);
2231 	if (r)
2232 		return r;
2233 
2234 	return 0;
2235 }
2236 
2237 
2238 static int gfx_v9_0_sw_fini(void *handle)
2239 {
2240 	int i;
2241 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2242 
2243 	amdgpu_gfx_ras_fini(adev);
2244 
2245 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2246 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2247 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2248 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2249 
2250 	amdgpu_gfx_mqd_sw_fini(adev);
2251 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2252 	amdgpu_gfx_kiq_fini(adev);
2253 
2254 	gfx_v9_0_mec_fini(adev);
2255 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2256 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2257 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2258 				&adev->gfx.rlc.cp_table_gpu_addr,
2259 				(void **)&adev->gfx.rlc.cp_table_ptr);
2260 	}
2261 	gfx_v9_0_free_microcode(adev);
2262 
2263 	return 0;
2264 }
2265 
2266 
2267 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2268 {
2269 	/* TODO */
2270 }
2271 
2272 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2273 {
2274 	u32 data;
2275 
2276 	if (instance == 0xffffffff)
2277 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2278 	else
2279 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2280 
2281 	if (se_num == 0xffffffff)
2282 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2283 	else
2284 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2285 
2286 	if (sh_num == 0xffffffff)
2287 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2288 	else
2289 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2290 
2291 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2292 }
2293 
2294 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2295 {
2296 	u32 data, mask;
2297 
2298 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2299 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2300 
2301 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2302 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2303 
2304 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2305 					 adev->gfx.config.max_sh_per_se);
2306 
2307 	return (~data) & mask;
2308 }
2309 
2310 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2311 {
2312 	int i, j;
2313 	u32 data;
2314 	u32 active_rbs = 0;
2315 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2316 					adev->gfx.config.max_sh_per_se;
2317 
2318 	mutex_lock(&adev->grbm_idx_mutex);
2319 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2320 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2321 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2322 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2323 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2324 					       rb_bitmap_width_per_sh);
2325 		}
2326 	}
2327 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2328 	mutex_unlock(&adev->grbm_idx_mutex);
2329 
2330 	adev->gfx.config.backend_enable_mask = active_rbs;
2331 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2332 }
2333 
2334 #define DEFAULT_SH_MEM_BASES	(0x6000)
2335 #define FIRST_COMPUTE_VMID	(8)
2336 #define LAST_COMPUTE_VMID	(16)
2337 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2338 {
2339 	int i;
2340 	uint32_t sh_mem_config;
2341 	uint32_t sh_mem_bases;
2342 
2343 	/*
2344 	 * Configure apertures:
2345 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2346 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2347 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2348 	 */
2349 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2350 
2351 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2352 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2353 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2354 
2355 	mutex_lock(&adev->srbm_mutex);
2356 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2357 		soc15_grbm_select(adev, 0, 0, 0, i);
2358 		/* CP and shaders */
2359 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2360 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2361 	}
2362 	soc15_grbm_select(adev, 0, 0, 0, 0);
2363 	mutex_unlock(&adev->srbm_mutex);
2364 
2365 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2366 	   acccess. These should be enabled by FW for target VMIDs. */
2367 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2368 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2369 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2370 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2371 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2372 	}
2373 }
2374 
2375 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2376 {
2377 	int vmid;
2378 
2379 	/*
2380 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2381 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2382 	 * the driver can enable them for graphics. VMID0 should maintain
2383 	 * access so that HWS firmware can save/restore entries.
2384 	 */
2385 	for (vmid = 1; vmid < 16; vmid++) {
2386 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2387 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2388 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2389 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2390 	}
2391 }
2392 
2393 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2394 {
2395 	u32 tmp;
2396 	int i;
2397 
2398 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2399 
2400 	gfx_v9_0_tiling_mode_table_init(adev);
2401 
2402 	gfx_v9_0_setup_rb(adev);
2403 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2404 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2405 
2406 	/* XXX SH_MEM regs */
2407 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2408 	mutex_lock(&adev->srbm_mutex);
2409 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2410 		soc15_grbm_select(adev, 0, 0, 0, i);
2411 		/* CP and shaders */
2412 		if (i == 0) {
2413 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2414 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2415 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2416 					    !!amdgpu_noretry);
2417 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2418 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2419 		} else {
2420 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2421 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2422 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2423 					    !!amdgpu_noretry);
2424 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2425 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2426 				(adev->gmc.private_aperture_start >> 48));
2427 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2428 				(adev->gmc.shared_aperture_start >> 48));
2429 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2430 		}
2431 	}
2432 	soc15_grbm_select(adev, 0, 0, 0, 0);
2433 
2434 	mutex_unlock(&adev->srbm_mutex);
2435 
2436 	gfx_v9_0_init_compute_vmid(adev);
2437 	gfx_v9_0_init_gds_vmid(adev);
2438 }
2439 
2440 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2441 {
2442 	u32 i, j, k;
2443 	u32 mask;
2444 
2445 	mutex_lock(&adev->grbm_idx_mutex);
2446 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2447 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2448 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2449 			for (k = 0; k < adev->usec_timeout; k++) {
2450 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2451 					break;
2452 				udelay(1);
2453 			}
2454 			if (k == adev->usec_timeout) {
2455 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2456 						      0xffffffff, 0xffffffff);
2457 				mutex_unlock(&adev->grbm_idx_mutex);
2458 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2459 					 i, j);
2460 				return;
2461 			}
2462 		}
2463 	}
2464 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2465 	mutex_unlock(&adev->grbm_idx_mutex);
2466 
2467 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2468 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2469 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2470 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2471 	for (k = 0; k < adev->usec_timeout; k++) {
2472 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2473 			break;
2474 		udelay(1);
2475 	}
2476 }
2477 
2478 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2479 					       bool enable)
2480 {
2481 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2482 
2483 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2484 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2485 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2486 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2487 
2488 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2489 }
2490 
2491 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2492 {
2493 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2494 	/* csib */
2495 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2496 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2497 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2498 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2499 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2500 			adev->gfx.rlc.clear_state_size);
2501 }
2502 
2503 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2504 				int indirect_offset,
2505 				int list_size,
2506 				int *unique_indirect_regs,
2507 				int unique_indirect_reg_count,
2508 				int *indirect_start_offsets,
2509 				int *indirect_start_offsets_count,
2510 				int max_start_offsets_count)
2511 {
2512 	int idx;
2513 
2514 	for (; indirect_offset < list_size; indirect_offset++) {
2515 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2516 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2517 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2518 
2519 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2520 			indirect_offset += 2;
2521 
2522 			/* look for the matching indice */
2523 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2524 				if (unique_indirect_regs[idx] ==
2525 					register_list_format[indirect_offset] ||
2526 					!unique_indirect_regs[idx])
2527 					break;
2528 			}
2529 
2530 			BUG_ON(idx >= unique_indirect_reg_count);
2531 
2532 			if (!unique_indirect_regs[idx])
2533 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2534 
2535 			indirect_offset++;
2536 		}
2537 	}
2538 }
2539 
2540 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2541 {
2542 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2543 	int unique_indirect_reg_count = 0;
2544 
2545 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2546 	int indirect_start_offsets_count = 0;
2547 
2548 	int list_size = 0;
2549 	int i = 0, j = 0;
2550 	u32 tmp = 0;
2551 
2552 	u32 *register_list_format =
2553 		kmemdup(adev->gfx.rlc.register_list_format,
2554 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2555 	if (!register_list_format)
2556 		return -ENOMEM;
2557 
2558 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2559 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2560 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2561 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2562 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2563 				    unique_indirect_regs,
2564 				    unique_indirect_reg_count,
2565 				    indirect_start_offsets,
2566 				    &indirect_start_offsets_count,
2567 				    ARRAY_SIZE(indirect_start_offsets));
2568 
2569 	/* enable auto inc in case it is disabled */
2570 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2571 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2572 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2573 
2574 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2575 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2576 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2577 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2578 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2579 			adev->gfx.rlc.register_restore[i]);
2580 
2581 	/* load indirect register */
2582 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2583 		adev->gfx.rlc.reg_list_format_start);
2584 
2585 	/* direct register portion */
2586 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2587 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2588 			register_list_format[i]);
2589 
2590 	/* indirect register portion */
2591 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2592 		if (register_list_format[i] == 0xFFFFFFFF) {
2593 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594 			continue;
2595 		}
2596 
2597 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2598 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2599 
2600 		for (j = 0; j < unique_indirect_reg_count; j++) {
2601 			if (register_list_format[i] == unique_indirect_regs[j]) {
2602 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2603 				break;
2604 			}
2605 		}
2606 
2607 		BUG_ON(j >= unique_indirect_reg_count);
2608 
2609 		i++;
2610 	}
2611 
2612 	/* set save/restore list size */
2613 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2614 	list_size = list_size >> 1;
2615 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2616 		adev->gfx.rlc.reg_restore_list_size);
2617 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2618 
2619 	/* write the starting offsets to RLC scratch ram */
2620 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2621 		adev->gfx.rlc.starting_offsets_start);
2622 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2623 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2624 		       indirect_start_offsets[i]);
2625 
2626 	/* load unique indirect regs*/
2627 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2628 		if (unique_indirect_regs[i] != 0) {
2629 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2630 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2631 			       unique_indirect_regs[i] & 0x3FFFF);
2632 
2633 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2634 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2635 			       unique_indirect_regs[i] >> 20);
2636 		}
2637 	}
2638 
2639 	kfree(register_list_format);
2640 	return 0;
2641 }
2642 
2643 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2644 {
2645 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2646 }
2647 
2648 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2649 					     bool enable)
2650 {
2651 	uint32_t data = 0;
2652 	uint32_t default_data = 0;
2653 
2654 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2655 	if (enable == true) {
2656 		/* enable GFXIP control over CGPG */
2657 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2658 		if(default_data != data)
2659 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2660 
2661 		/* update status */
2662 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2663 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2664 		if(default_data != data)
2665 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2666 	} else {
2667 		/* restore GFXIP control over GCPG */
2668 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2669 		if(default_data != data)
2670 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2671 	}
2672 }
2673 
2674 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2675 {
2676 	uint32_t data = 0;
2677 
2678 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2679 			      AMD_PG_SUPPORT_GFX_SMG |
2680 			      AMD_PG_SUPPORT_GFX_DMG)) {
2681 		/* init IDLE_POLL_COUNT = 60 */
2682 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2683 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2684 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2685 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2686 
2687 		/* init RLC PG Delay */
2688 		data = 0;
2689 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2690 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2691 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2692 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2693 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2694 
2695 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2696 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2697 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2698 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2699 
2700 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2701 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2702 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2703 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2704 
2705 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2706 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2707 
2708 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2709 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2710 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2711 
2712 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2713 	}
2714 }
2715 
2716 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2717 						bool enable)
2718 {
2719 	uint32_t data = 0;
2720 	uint32_t default_data = 0;
2721 
2722 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2723 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2724 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2725 			     enable ? 1 : 0);
2726 	if (default_data != data)
2727 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2728 }
2729 
2730 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2731 						bool enable)
2732 {
2733 	uint32_t data = 0;
2734 	uint32_t default_data = 0;
2735 
2736 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2737 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2738 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2739 			     enable ? 1 : 0);
2740 	if(default_data != data)
2741 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2742 }
2743 
2744 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2745 					bool enable)
2746 {
2747 	uint32_t data = 0;
2748 	uint32_t default_data = 0;
2749 
2750 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2751 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2752 			     CP_PG_DISABLE,
2753 			     enable ? 0 : 1);
2754 	if(default_data != data)
2755 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2756 }
2757 
2758 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2759 						bool enable)
2760 {
2761 	uint32_t data, default_data;
2762 
2763 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2764 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2765 			     GFX_POWER_GATING_ENABLE,
2766 			     enable ? 1 : 0);
2767 	if(default_data != data)
2768 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2769 }
2770 
2771 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2772 						bool enable)
2773 {
2774 	uint32_t data, default_data;
2775 
2776 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2777 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2778 			     GFX_PIPELINE_PG_ENABLE,
2779 			     enable ? 1 : 0);
2780 	if(default_data != data)
2781 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2782 
2783 	if (!enable)
2784 		/* read any GFX register to wake up GFX */
2785 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2786 }
2787 
2788 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2789 						       bool enable)
2790 {
2791 	uint32_t data, default_data;
2792 
2793 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795 			     STATIC_PER_CU_PG_ENABLE,
2796 			     enable ? 1 : 0);
2797 	if(default_data != data)
2798 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800 
2801 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2802 						bool enable)
2803 {
2804 	uint32_t data, default_data;
2805 
2806 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808 			     DYN_PER_CU_PG_ENABLE,
2809 			     enable ? 1 : 0);
2810 	if(default_data != data)
2811 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813 
2814 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2815 {
2816 	gfx_v9_0_init_csb(adev);
2817 
2818 	/*
2819 	 * Rlc save restore list is workable since v2_1.
2820 	 * And it's needed by gfxoff feature.
2821 	 */
2822 	if (adev->gfx.rlc.is_rlc_v2_1) {
2823 		if (adev->asic_type == CHIP_VEGA12 ||
2824 		    (adev->asic_type == CHIP_RAVEN &&
2825 		     adev->rev_id >= 8))
2826 			gfx_v9_1_init_rlc_save_restore_list(adev);
2827 		gfx_v9_0_enable_save_restore_machine(adev);
2828 	}
2829 
2830 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831 			      AMD_PG_SUPPORT_GFX_SMG |
2832 			      AMD_PG_SUPPORT_GFX_DMG |
2833 			      AMD_PG_SUPPORT_CP |
2834 			      AMD_PG_SUPPORT_GDS |
2835 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2836 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2837 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2838 		gfx_v9_0_init_gfx_power_gating(adev);
2839 	}
2840 }
2841 
2842 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2843 {
2844 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2845 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2846 	gfx_v9_0_wait_for_rlc_serdes(adev);
2847 }
2848 
2849 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2850 {
2851 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2852 	udelay(50);
2853 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2854 	udelay(50);
2855 }
2856 
2857 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2858 {
2859 #ifdef AMDGPU_RLC_DEBUG_RETRY
2860 	u32 rlc_ucode_ver;
2861 #endif
2862 
2863 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2864 	udelay(50);
2865 
2866 	/* carrizo do enable cp interrupt after cp inited */
2867 	if (!(adev->flags & AMD_IS_APU)) {
2868 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2869 		udelay(50);
2870 	}
2871 
2872 #ifdef AMDGPU_RLC_DEBUG_RETRY
2873 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2874 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2875 	if(rlc_ucode_ver == 0x108) {
2876 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2877 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2878 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2879 		 * default is 0x9C4 to create a 100us interval */
2880 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2881 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2882 		 * to disable the page fault retry interrupts, default is
2883 		 * 0x100 (256) */
2884 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2885 	}
2886 #endif
2887 }
2888 
2889 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2890 {
2891 	const struct rlc_firmware_header_v2_0 *hdr;
2892 	const __le32 *fw_data;
2893 	unsigned i, fw_size;
2894 
2895 	if (!adev->gfx.rlc_fw)
2896 		return -EINVAL;
2897 
2898 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2899 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2900 
2901 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2902 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2903 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2904 
2905 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2906 			RLCG_UCODE_LOADING_START_ADDRESS);
2907 	for (i = 0; i < fw_size; i++)
2908 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2909 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2910 
2911 	return 0;
2912 }
2913 
2914 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2915 {
2916 	int r;
2917 
2918 	if (amdgpu_sriov_vf(adev)) {
2919 		gfx_v9_0_init_csb(adev);
2920 		return 0;
2921 	}
2922 
2923 	adev->gfx.rlc.funcs->stop(adev);
2924 
2925 	/* disable CG */
2926 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2927 
2928 	gfx_v9_0_init_pg(adev);
2929 
2930 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2931 		/* legacy rlc firmware loading */
2932 		r = gfx_v9_0_rlc_load_microcode(adev);
2933 		if (r)
2934 			return r;
2935 	}
2936 
2937 	switch (adev->asic_type) {
2938 	case CHIP_RAVEN:
2939 		if (amdgpu_lbpw == 0)
2940 			gfx_v9_0_enable_lbpw(adev, false);
2941 		else
2942 			gfx_v9_0_enable_lbpw(adev, true);
2943 		break;
2944 	case CHIP_VEGA20:
2945 		if (amdgpu_lbpw > 0)
2946 			gfx_v9_0_enable_lbpw(adev, true);
2947 		else
2948 			gfx_v9_0_enable_lbpw(adev, false);
2949 		break;
2950 	default:
2951 		break;
2952 	}
2953 
2954 	adev->gfx.rlc.funcs->start(adev);
2955 
2956 	return 0;
2957 }
2958 
2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2960 {
2961 	int i;
2962 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2963 
2964 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2965 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2966 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2967 	if (!enable) {
2968 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2969 			adev->gfx.gfx_ring[i].sched.ready = false;
2970 	}
2971 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2972 	udelay(50);
2973 }
2974 
2975 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2976 {
2977 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2978 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2979 	const struct gfx_firmware_header_v1_0 *me_hdr;
2980 	const __le32 *fw_data;
2981 	unsigned i, fw_size;
2982 
2983 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2984 		return -EINVAL;
2985 
2986 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2987 		adev->gfx.pfp_fw->data;
2988 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2989 		adev->gfx.ce_fw->data;
2990 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2991 		adev->gfx.me_fw->data;
2992 
2993 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2994 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2995 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2996 
2997 	gfx_v9_0_cp_gfx_enable(adev, false);
2998 
2999 	/* PFP */
3000 	fw_data = (const __le32 *)
3001 		(adev->gfx.pfp_fw->data +
3002 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3003 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3004 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3005 	for (i = 0; i < fw_size; i++)
3006 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3007 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3008 
3009 	/* CE */
3010 	fw_data = (const __le32 *)
3011 		(adev->gfx.ce_fw->data +
3012 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3013 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3014 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3015 	for (i = 0; i < fw_size; i++)
3016 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3017 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3018 
3019 	/* ME */
3020 	fw_data = (const __le32 *)
3021 		(adev->gfx.me_fw->data +
3022 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3023 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3024 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3025 	for (i = 0; i < fw_size; i++)
3026 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3027 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3028 
3029 	return 0;
3030 }
3031 
3032 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3033 {
3034 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3035 	const struct cs_section_def *sect = NULL;
3036 	const struct cs_extent_def *ext = NULL;
3037 	int r, i, tmp;
3038 
3039 	/* init the CP */
3040 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3041 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3042 
3043 	gfx_v9_0_cp_gfx_enable(adev, true);
3044 
3045 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3046 	if (r) {
3047 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3048 		return r;
3049 	}
3050 
3051 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3053 
3054 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055 	amdgpu_ring_write(ring, 0x80000000);
3056 	amdgpu_ring_write(ring, 0x80000000);
3057 
3058 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3059 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3060 			if (sect->id == SECT_CONTEXT) {
3061 				amdgpu_ring_write(ring,
3062 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3063 					       ext->reg_count));
3064 				amdgpu_ring_write(ring,
3065 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066 				for (i = 0; i < ext->reg_count; i++)
3067 					amdgpu_ring_write(ring, ext->extent[i]);
3068 			}
3069 		}
3070 	}
3071 
3072 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3074 
3075 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3076 	amdgpu_ring_write(ring, 0);
3077 
3078 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3079 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3080 	amdgpu_ring_write(ring, 0x8000);
3081 	amdgpu_ring_write(ring, 0x8000);
3082 
3083 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3084 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3085 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3086 	amdgpu_ring_write(ring, tmp);
3087 	amdgpu_ring_write(ring, 0);
3088 
3089 	amdgpu_ring_commit(ring);
3090 
3091 	return 0;
3092 }
3093 
3094 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3095 {
3096 	struct amdgpu_ring *ring;
3097 	u32 tmp;
3098 	u32 rb_bufsz;
3099 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3100 
3101 	/* Set the write pointer delay */
3102 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3103 
3104 	/* set the RB to use vmid 0 */
3105 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3106 
3107 	/* Set ring buffer size */
3108 	ring = &adev->gfx.gfx_ring[0];
3109 	rb_bufsz = order_base_2(ring->ring_size / 8);
3110 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3111 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3112 #ifdef __BIG_ENDIAN
3113 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3114 #endif
3115 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3116 
3117 	/* Initialize the ring buffer's write pointers */
3118 	ring->wptr = 0;
3119 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3120 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3121 
3122 	/* set the wb address wether it's enabled or not */
3123 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3124 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3125 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3126 
3127 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3128 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3129 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3130 
3131 	mdelay(1);
3132 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3133 
3134 	rb_addr = ring->gpu_addr >> 8;
3135 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3136 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3137 
3138 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3139 	if (ring->use_doorbell) {
3140 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3141 				    DOORBELL_OFFSET, ring->doorbell_index);
3142 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143 				    DOORBELL_EN, 1);
3144 	} else {
3145 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3146 	}
3147 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3148 
3149 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3150 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3151 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3152 
3153 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3154 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3155 
3156 
3157 	/* start the ring */
3158 	gfx_v9_0_cp_gfx_start(adev);
3159 	ring->sched.ready = true;
3160 
3161 	return 0;
3162 }
3163 
3164 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3165 {
3166 	int i;
3167 
3168 	if (enable) {
3169 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3170 	} else {
3171 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3172 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3173 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3174 			adev->gfx.compute_ring[i].sched.ready = false;
3175 		adev->gfx.kiq.ring.sched.ready = false;
3176 	}
3177 	udelay(50);
3178 }
3179 
3180 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3181 {
3182 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3183 	const __le32 *fw_data;
3184 	unsigned i;
3185 	u32 tmp;
3186 
3187 	if (!adev->gfx.mec_fw)
3188 		return -EINVAL;
3189 
3190 	gfx_v9_0_cp_compute_enable(adev, false);
3191 
3192 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3193 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3194 
3195 	fw_data = (const __le32 *)
3196 		(adev->gfx.mec_fw->data +
3197 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3198 	tmp = 0;
3199 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3200 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3201 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3202 
3203 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3204 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3205 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3206 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3207 
3208 	/* MEC1 */
3209 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3210 			 mec_hdr->jt_offset);
3211 	for (i = 0; i < mec_hdr->jt_size; i++)
3212 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3213 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3214 
3215 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3216 			adev->gfx.mec_fw_version);
3217 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3218 
3219 	return 0;
3220 }
3221 
3222 /* KIQ functions */
3223 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3224 {
3225 	uint32_t tmp;
3226 	struct amdgpu_device *adev = ring->adev;
3227 
3228 	/* tell RLC which is KIQ queue */
3229 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3230 	tmp &= 0xffffff00;
3231 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3232 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3233 	tmp |= 0x80;
3234 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3235 }
3236 
3237 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3238 {
3239 	struct amdgpu_device *adev = ring->adev;
3240 	struct v9_mqd *mqd = ring->mqd_ptr;
3241 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3242 	uint32_t tmp;
3243 
3244 	mqd->header = 0xC0310800;
3245 	mqd->compute_pipelinestat_enable = 0x00000001;
3246 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3247 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3248 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3249 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3250 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3251 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3252 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3253 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3254 	mqd->compute_misc_reserved = 0x00000003;
3255 
3256 	mqd->dynamic_cu_mask_addr_lo =
3257 		lower_32_bits(ring->mqd_gpu_addr
3258 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3259 	mqd->dynamic_cu_mask_addr_hi =
3260 		upper_32_bits(ring->mqd_gpu_addr
3261 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3262 
3263 	eop_base_addr = ring->eop_gpu_addr >> 8;
3264 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3265 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3266 
3267 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3268 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3269 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3270 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3271 
3272 	mqd->cp_hqd_eop_control = tmp;
3273 
3274 	/* enable doorbell? */
3275 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3276 
3277 	if (ring->use_doorbell) {
3278 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3279 				    DOORBELL_OFFSET, ring->doorbell_index);
3280 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3281 				    DOORBELL_EN, 1);
3282 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3283 				    DOORBELL_SOURCE, 0);
3284 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3285 				    DOORBELL_HIT, 0);
3286 	} else {
3287 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3288 					 DOORBELL_EN, 0);
3289 	}
3290 
3291 	mqd->cp_hqd_pq_doorbell_control = tmp;
3292 
3293 	/* disable the queue if it's active */
3294 	ring->wptr = 0;
3295 	mqd->cp_hqd_dequeue_request = 0;
3296 	mqd->cp_hqd_pq_rptr = 0;
3297 	mqd->cp_hqd_pq_wptr_lo = 0;
3298 	mqd->cp_hqd_pq_wptr_hi = 0;
3299 
3300 	/* set the pointer to the MQD */
3301 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3302 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3303 
3304 	/* set MQD vmid to 0 */
3305 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3306 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3307 	mqd->cp_mqd_control = tmp;
3308 
3309 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3310 	hqd_gpu_addr = ring->gpu_addr >> 8;
3311 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3312 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3313 
3314 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3315 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3316 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3317 			    (order_base_2(ring->ring_size / 4) - 1));
3318 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3319 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3320 #ifdef __BIG_ENDIAN
3321 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3322 #endif
3323 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3324 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3325 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3326 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3327 	mqd->cp_hqd_pq_control = tmp;
3328 
3329 	/* set the wb address whether it's enabled or not */
3330 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3331 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3332 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3333 		upper_32_bits(wb_gpu_addr) & 0xffff;
3334 
3335 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3336 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3337 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3338 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3339 
3340 	tmp = 0;
3341 	/* enable the doorbell if requested */
3342 	if (ring->use_doorbell) {
3343 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3344 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3345 				DOORBELL_OFFSET, ring->doorbell_index);
3346 
3347 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3348 					 DOORBELL_EN, 1);
3349 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3350 					 DOORBELL_SOURCE, 0);
3351 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3352 					 DOORBELL_HIT, 0);
3353 	}
3354 
3355 	mqd->cp_hqd_pq_doorbell_control = tmp;
3356 
3357 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3358 	ring->wptr = 0;
3359 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3360 
3361 	/* set the vmid for the queue */
3362 	mqd->cp_hqd_vmid = 0;
3363 
3364 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3365 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3366 	mqd->cp_hqd_persistent_state = tmp;
3367 
3368 	/* set MIN_IB_AVAIL_SIZE */
3369 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3370 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3371 	mqd->cp_hqd_ib_control = tmp;
3372 
3373 	/* map_queues packet doesn't need activate the queue,
3374 	 * so only kiq need set this field.
3375 	 */
3376 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3377 		mqd->cp_hqd_active = 1;
3378 
3379 	return 0;
3380 }
3381 
3382 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3383 {
3384 	struct amdgpu_device *adev = ring->adev;
3385 	struct v9_mqd *mqd = ring->mqd_ptr;
3386 	int j;
3387 
3388 	/* disable wptr polling */
3389 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3390 
3391 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3392 	       mqd->cp_hqd_eop_base_addr_lo);
3393 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3394 	       mqd->cp_hqd_eop_base_addr_hi);
3395 
3396 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3397 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3398 	       mqd->cp_hqd_eop_control);
3399 
3400 	/* enable doorbell? */
3401 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3402 	       mqd->cp_hqd_pq_doorbell_control);
3403 
3404 	/* disable the queue if it's active */
3405 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3406 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3407 		for (j = 0; j < adev->usec_timeout; j++) {
3408 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3409 				break;
3410 			udelay(1);
3411 		}
3412 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3413 		       mqd->cp_hqd_dequeue_request);
3414 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3415 		       mqd->cp_hqd_pq_rptr);
3416 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3417 		       mqd->cp_hqd_pq_wptr_lo);
3418 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3419 		       mqd->cp_hqd_pq_wptr_hi);
3420 	}
3421 
3422 	/* set the pointer to the MQD */
3423 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3424 	       mqd->cp_mqd_base_addr_lo);
3425 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3426 	       mqd->cp_mqd_base_addr_hi);
3427 
3428 	/* set MQD vmid to 0 */
3429 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3430 	       mqd->cp_mqd_control);
3431 
3432 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3433 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3434 	       mqd->cp_hqd_pq_base_lo);
3435 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3436 	       mqd->cp_hqd_pq_base_hi);
3437 
3438 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3439 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3440 	       mqd->cp_hqd_pq_control);
3441 
3442 	/* set the wb address whether it's enabled or not */
3443 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3444 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3445 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3446 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3447 
3448 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3449 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3450 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3451 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3452 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3453 
3454 	/* enable the doorbell if requested */
3455 	if (ring->use_doorbell) {
3456 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3457 					(adev->doorbell_index.kiq * 2) << 2);
3458 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3459 					(adev->doorbell_index.userqueue_end * 2) << 2);
3460 	}
3461 
3462 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3463 	       mqd->cp_hqd_pq_doorbell_control);
3464 
3465 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3467 	       mqd->cp_hqd_pq_wptr_lo);
3468 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3469 	       mqd->cp_hqd_pq_wptr_hi);
3470 
3471 	/* set the vmid for the queue */
3472 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3473 
3474 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3475 	       mqd->cp_hqd_persistent_state);
3476 
3477 	/* activate the queue */
3478 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3479 	       mqd->cp_hqd_active);
3480 
3481 	if (ring->use_doorbell)
3482 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3483 
3484 	return 0;
3485 }
3486 
3487 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3488 {
3489 	struct amdgpu_device *adev = ring->adev;
3490 	int j;
3491 
3492 	/* disable the queue if it's active */
3493 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3494 
3495 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3496 
3497 		for (j = 0; j < adev->usec_timeout; j++) {
3498 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3499 				break;
3500 			udelay(1);
3501 		}
3502 
3503 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3504 			DRM_DEBUG("KIQ dequeue request failed.\n");
3505 
3506 			/* Manual disable if dequeue request times out */
3507 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3508 		}
3509 
3510 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3511 		      0);
3512 	}
3513 
3514 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3515 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3516 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3517 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3518 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3519 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3520 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3521 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3522 
3523 	return 0;
3524 }
3525 
3526 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3527 {
3528 	struct amdgpu_device *adev = ring->adev;
3529 	struct v9_mqd *mqd = ring->mqd_ptr;
3530 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3531 
3532 	gfx_v9_0_kiq_setting(ring);
3533 
3534 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3535 		/* reset MQD to a clean status */
3536 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3537 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3538 
3539 		/* reset ring buffer */
3540 		ring->wptr = 0;
3541 		amdgpu_ring_clear_ring(ring);
3542 
3543 		mutex_lock(&adev->srbm_mutex);
3544 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3545 		gfx_v9_0_kiq_init_register(ring);
3546 		soc15_grbm_select(adev, 0, 0, 0, 0);
3547 		mutex_unlock(&adev->srbm_mutex);
3548 	} else {
3549 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3550 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3551 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3552 		mutex_lock(&adev->srbm_mutex);
3553 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3554 		gfx_v9_0_mqd_init(ring);
3555 		gfx_v9_0_kiq_init_register(ring);
3556 		soc15_grbm_select(adev, 0, 0, 0, 0);
3557 		mutex_unlock(&adev->srbm_mutex);
3558 
3559 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3560 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3561 	}
3562 
3563 	return 0;
3564 }
3565 
3566 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3567 {
3568 	struct amdgpu_device *adev = ring->adev;
3569 	struct v9_mqd *mqd = ring->mqd_ptr;
3570 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3571 
3572 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3573 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3574 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3575 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3576 		mutex_lock(&adev->srbm_mutex);
3577 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3578 		gfx_v9_0_mqd_init(ring);
3579 		soc15_grbm_select(adev, 0, 0, 0, 0);
3580 		mutex_unlock(&adev->srbm_mutex);
3581 
3582 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3583 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3584 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3585 		/* reset MQD to a clean status */
3586 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3587 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3588 
3589 		/* reset ring buffer */
3590 		ring->wptr = 0;
3591 		amdgpu_ring_clear_ring(ring);
3592 	} else {
3593 		amdgpu_ring_clear_ring(ring);
3594 	}
3595 
3596 	return 0;
3597 }
3598 
3599 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3600 {
3601 	struct amdgpu_ring *ring;
3602 	int r;
3603 
3604 	ring = &adev->gfx.kiq.ring;
3605 
3606 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3607 	if (unlikely(r != 0))
3608 		return r;
3609 
3610 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3611 	if (unlikely(r != 0))
3612 		return r;
3613 
3614 	gfx_v9_0_kiq_init_queue(ring);
3615 	amdgpu_bo_kunmap(ring->mqd_obj);
3616 	ring->mqd_ptr = NULL;
3617 	amdgpu_bo_unreserve(ring->mqd_obj);
3618 	ring->sched.ready = true;
3619 	return 0;
3620 }
3621 
3622 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3623 {
3624 	struct amdgpu_ring *ring = NULL;
3625 	int r = 0, i;
3626 
3627 	gfx_v9_0_cp_compute_enable(adev, true);
3628 
3629 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3630 		ring = &adev->gfx.compute_ring[i];
3631 
3632 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3633 		if (unlikely(r != 0))
3634 			goto done;
3635 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3636 		if (!r) {
3637 			r = gfx_v9_0_kcq_init_queue(ring);
3638 			amdgpu_bo_kunmap(ring->mqd_obj);
3639 			ring->mqd_ptr = NULL;
3640 		}
3641 		amdgpu_bo_unreserve(ring->mqd_obj);
3642 		if (r)
3643 			goto done;
3644 	}
3645 
3646 	r = amdgpu_gfx_enable_kcq(adev);
3647 done:
3648 	return r;
3649 }
3650 
3651 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3652 {
3653 	int r, i;
3654 	struct amdgpu_ring *ring;
3655 
3656 	if (!(adev->flags & AMD_IS_APU))
3657 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3658 
3659 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3660 		if (adev->asic_type != CHIP_ARCTURUS) {
3661 			/* legacy firmware loading */
3662 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3663 			if (r)
3664 				return r;
3665 		}
3666 
3667 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3668 		if (r)
3669 			return r;
3670 	}
3671 
3672 	r = gfx_v9_0_kiq_resume(adev);
3673 	if (r)
3674 		return r;
3675 
3676 	if (adev->asic_type != CHIP_ARCTURUS) {
3677 		r = gfx_v9_0_cp_gfx_resume(adev);
3678 		if (r)
3679 			return r;
3680 	}
3681 
3682 	r = gfx_v9_0_kcq_resume(adev);
3683 	if (r)
3684 		return r;
3685 
3686 	if (adev->asic_type != CHIP_ARCTURUS) {
3687 		ring = &adev->gfx.gfx_ring[0];
3688 		r = amdgpu_ring_test_helper(ring);
3689 		if (r)
3690 			return r;
3691 	}
3692 
3693 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3694 		ring = &adev->gfx.compute_ring[i];
3695 		amdgpu_ring_test_helper(ring);
3696 	}
3697 
3698 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3699 
3700 	return 0;
3701 }
3702 
3703 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3704 {
3705 	u32 tmp;
3706 
3707 	if (adev->asic_type != CHIP_ARCTURUS)
3708 		return;
3709 
3710 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3711 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3712 				adev->df.hash_status.hash_64k);
3713 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3714 				adev->df.hash_status.hash_2m);
3715 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3716 				adev->df.hash_status.hash_1g);
3717 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3718 }
3719 
3720 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3721 {
3722 	if (adev->asic_type != CHIP_ARCTURUS)
3723 		gfx_v9_0_cp_gfx_enable(adev, enable);
3724 	gfx_v9_0_cp_compute_enable(adev, enable);
3725 }
3726 
3727 static int gfx_v9_0_hw_init(void *handle)
3728 {
3729 	int r;
3730 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3731 
3732 	if (!amdgpu_sriov_vf(adev))
3733 		gfx_v9_0_init_golden_registers(adev);
3734 
3735 	gfx_v9_0_constants_init(adev);
3736 
3737 	gfx_v9_0_init_tcp_config(adev);
3738 
3739 	r = adev->gfx.rlc.funcs->resume(adev);
3740 	if (r)
3741 		return r;
3742 
3743 	r = gfx_v9_0_cp_resume(adev);
3744 	if (r)
3745 		return r;
3746 
3747 	return r;
3748 }
3749 
3750 static int gfx_v9_0_hw_fini(void *handle)
3751 {
3752 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3753 
3754 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3755 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3756 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3757 
3758 	/* DF freeze and kcq disable will fail */
3759 	if (!amdgpu_ras_intr_triggered())
3760 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3761 		amdgpu_gfx_disable_kcq(adev);
3762 
3763 	if (amdgpu_sriov_vf(adev)) {
3764 		gfx_v9_0_cp_gfx_enable(adev, false);
3765 		/* must disable polling for SRIOV when hw finished, otherwise
3766 		 * CPC engine may still keep fetching WB address which is already
3767 		 * invalid after sw finished and trigger DMAR reading error in
3768 		 * hypervisor side.
3769 		 */
3770 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3771 		return 0;
3772 	}
3773 
3774 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3775 	 * otherwise KIQ is hanging when binding back
3776 	 */
3777 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3778 		mutex_lock(&adev->srbm_mutex);
3779 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3780 				adev->gfx.kiq.ring.pipe,
3781 				adev->gfx.kiq.ring.queue, 0);
3782 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3783 		soc15_grbm_select(adev, 0, 0, 0, 0);
3784 		mutex_unlock(&adev->srbm_mutex);
3785 	}
3786 
3787 	gfx_v9_0_cp_enable(adev, false);
3788 	adev->gfx.rlc.funcs->stop(adev);
3789 
3790 	return 0;
3791 }
3792 
3793 static int gfx_v9_0_suspend(void *handle)
3794 {
3795 	return gfx_v9_0_hw_fini(handle);
3796 }
3797 
3798 static int gfx_v9_0_resume(void *handle)
3799 {
3800 	return gfx_v9_0_hw_init(handle);
3801 }
3802 
3803 static bool gfx_v9_0_is_idle(void *handle)
3804 {
3805 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3806 
3807 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3808 				GRBM_STATUS, GUI_ACTIVE))
3809 		return false;
3810 	else
3811 		return true;
3812 }
3813 
3814 static int gfx_v9_0_wait_for_idle(void *handle)
3815 {
3816 	unsigned i;
3817 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3818 
3819 	for (i = 0; i < adev->usec_timeout; i++) {
3820 		if (gfx_v9_0_is_idle(handle))
3821 			return 0;
3822 		udelay(1);
3823 	}
3824 	return -ETIMEDOUT;
3825 }
3826 
3827 static int gfx_v9_0_soft_reset(void *handle)
3828 {
3829 	u32 grbm_soft_reset = 0;
3830 	u32 tmp;
3831 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832 
3833 	/* GRBM_STATUS */
3834 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3835 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3836 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3837 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3838 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3839 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3840 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3841 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3844 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3845 	}
3846 
3847 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3848 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3850 	}
3851 
3852 	/* GRBM_STATUS2 */
3853 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3854 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3855 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3856 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3857 
3858 
3859 	if (grbm_soft_reset) {
3860 		/* stop the rlc */
3861 		adev->gfx.rlc.funcs->stop(adev);
3862 
3863 		if (adev->asic_type != CHIP_ARCTURUS)
3864 			/* Disable GFX parsing/prefetching */
3865 			gfx_v9_0_cp_gfx_enable(adev, false);
3866 
3867 		/* Disable MEC parsing/prefetching */
3868 		gfx_v9_0_cp_compute_enable(adev, false);
3869 
3870 		if (grbm_soft_reset) {
3871 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3872 			tmp |= grbm_soft_reset;
3873 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3875 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3876 
3877 			udelay(50);
3878 
3879 			tmp &= ~grbm_soft_reset;
3880 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882 		}
3883 
3884 		/* Wait a little for things to settle down */
3885 		udelay(50);
3886 	}
3887 	return 0;
3888 }
3889 
3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3891 {
3892 	uint64_t clock;
3893 
3894 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3895 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3896 		uint32_t tmp, lsb, msb, i = 0;
3897 		do {
3898 			if (i != 0)
3899 				udelay(1);
3900 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3901 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3902 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3903 			i++;
3904 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3905 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3906 	} else {
3907 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3908 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3909 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3910 	}
3911 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3912 	return clock;
3913 }
3914 
3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3916 					  uint32_t vmid,
3917 					  uint32_t gds_base, uint32_t gds_size,
3918 					  uint32_t gws_base, uint32_t gws_size,
3919 					  uint32_t oa_base, uint32_t oa_size)
3920 {
3921 	struct amdgpu_device *adev = ring->adev;
3922 
3923 	/* GDS Base */
3924 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3925 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3926 				   gds_base);
3927 
3928 	/* GDS Size */
3929 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3930 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3931 				   gds_size);
3932 
3933 	/* GWS */
3934 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3935 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3936 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3937 
3938 	/* OA */
3939 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3940 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3941 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3942 }
3943 
3944 static const u32 vgpr_init_compute_shader[] =
3945 {
3946 	0xb07c0000, 0xbe8000ff,
3947 	0x000000f8, 0xbf110800,
3948 	0x7e000280, 0x7e020280,
3949 	0x7e040280, 0x7e060280,
3950 	0x7e080280, 0x7e0a0280,
3951 	0x7e0c0280, 0x7e0e0280,
3952 	0x80808800, 0xbe803200,
3953 	0xbf84fff5, 0xbf9c0000,
3954 	0xd28c0001, 0x0001007f,
3955 	0xd28d0001, 0x0002027e,
3956 	0x10020288, 0xb8810904,
3957 	0xb7814000, 0xd1196a01,
3958 	0x00000301, 0xbe800087,
3959 	0xbefc00c1, 0xd89c4000,
3960 	0x00020201, 0xd89cc080,
3961 	0x00040401, 0x320202ff,
3962 	0x00000800, 0x80808100,
3963 	0xbf84fff8, 0x7e020280,
3964 	0xbf810000, 0x00000000,
3965 };
3966 
3967 static const u32 sgpr_init_compute_shader[] =
3968 {
3969 	0xb07c0000, 0xbe8000ff,
3970 	0x0000005f, 0xbee50080,
3971 	0xbe812c65, 0xbe822c65,
3972 	0xbe832c65, 0xbe842c65,
3973 	0xbe852c65, 0xb77c0005,
3974 	0x80808500, 0xbf84fff8,
3975 	0xbe800080, 0xbf810000,
3976 };
3977 
3978 /* When below register arrays changed, please update gpr_reg_size,
3979   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
3980   to cover all gfx9 ASICs */
3981 static const struct soc15_reg_entry vgpr_init_regs[] = {
3982    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3983    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3984    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3989    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3990    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3991    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
3995    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
3996 };
3997 
3998 static const struct soc15_reg_entry sgpr1_init_regs[] = {
3999    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4000    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4001    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4002    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4003    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4004    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4005    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4006    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4007    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4008    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4009    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4010    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4011    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4012    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4013 };
4014 
4015 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4016    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4017    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4018    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4019    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4020    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4021    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4022    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4023    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4024    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4025    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4026    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4027    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4028    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4029    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4030 };
4031 
4032 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4033    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4034    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4035    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4036    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4037    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4038    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4039    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4040    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4041    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4042    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4043    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4044    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4045    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4046    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4047    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4048    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4049    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4050    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4051    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4052    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4053    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4054    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4055    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4056    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4057    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4058    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4059    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4060    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4061    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4062    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4063    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4064    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4065    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4066    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4067 };
4068 
4069 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4070 {
4071 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4072 	int i, r;
4073 
4074 	/* only support when RAS is enabled */
4075 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4076 		return 0;
4077 
4078 	r = amdgpu_ring_alloc(ring, 7);
4079 	if (r) {
4080 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4081 			ring->name, r);
4082 		return r;
4083 	}
4084 
4085 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4086 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4087 
4088 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4089 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4090 				PACKET3_DMA_DATA_DST_SEL(1) |
4091 				PACKET3_DMA_DATA_SRC_SEL(2) |
4092 				PACKET3_DMA_DATA_ENGINE(0)));
4093 	amdgpu_ring_write(ring, 0);
4094 	amdgpu_ring_write(ring, 0);
4095 	amdgpu_ring_write(ring, 0);
4096 	amdgpu_ring_write(ring, 0);
4097 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4098 				adev->gds.gds_size);
4099 
4100 	amdgpu_ring_commit(ring);
4101 
4102 	for (i = 0; i < adev->usec_timeout; i++) {
4103 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4104 			break;
4105 		udelay(1);
4106 	}
4107 
4108 	if (i >= adev->usec_timeout)
4109 		r = -ETIMEDOUT;
4110 
4111 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4112 
4113 	return r;
4114 }
4115 
4116 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4117 {
4118 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4119 	struct amdgpu_ib ib;
4120 	struct dma_fence *f = NULL;
4121 	int r, i, j, k;
4122 	unsigned total_size, vgpr_offset, sgpr_offset;
4123 	u64 gpu_addr;
4124 
4125 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4126 						adev->gfx.config.max_cu_per_sh *
4127 						adev->gfx.config.max_sh_per_se;
4128 	int sgpr_work_group_size = 5;
4129 	int gpr_reg_size = compute_dim_x / 16 + 6;
4130 
4131 	/* only support when RAS is enabled */
4132 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4133 		return 0;
4134 
4135 	/* bail if the compute ring is not ready */
4136 	if (!ring->sched.ready)
4137 		return 0;
4138 
4139 	total_size =
4140 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4141 	total_size +=
4142 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4143 	total_size +=
4144 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4145 	total_size = ALIGN(total_size, 256);
4146 	vgpr_offset = total_size;
4147 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4148 	sgpr_offset = total_size;
4149 	total_size += sizeof(sgpr_init_compute_shader);
4150 
4151 	/* allocate an indirect buffer to put the commands in */
4152 	memset(&ib, 0, sizeof(ib));
4153 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4154 	if (r) {
4155 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4156 		return r;
4157 	}
4158 
4159 	/* load the compute shaders */
4160 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4161 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4162 
4163 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4164 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4165 
4166 	/* init the ib length to 0 */
4167 	ib.length_dw = 0;
4168 
4169 	/* VGPR */
4170 	/* write the register state for the compute dispatch */
4171 	for (i = 0; i < gpr_reg_size; i++) {
4172 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4173 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4174 								- PACKET3_SET_SH_REG_START;
4175 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4176 	}
4177 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4178 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4179 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4180 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4181 							- PACKET3_SET_SH_REG_START;
4182 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4183 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4184 
4185 	/* write dispatch packet */
4186 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4187 	ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4188 	ib.ptr[ib.length_dw++] = 1; /* y */
4189 	ib.ptr[ib.length_dw++] = 1; /* z */
4190 	ib.ptr[ib.length_dw++] =
4191 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4192 
4193 	/* write CS partial flush packet */
4194 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4195 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4196 
4197 	/* SGPR1 */
4198 	/* write the register state for the compute dispatch */
4199 	for (i = 0; i < gpr_reg_size; i++) {
4200 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4201 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4202 								- PACKET3_SET_SH_REG_START;
4203 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4204 	}
4205 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4206 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4207 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4208 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4209 							- PACKET3_SET_SH_REG_START;
4210 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4211 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4212 
4213 	/* write dispatch packet */
4214 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4215 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4216 	ib.ptr[ib.length_dw++] = 1; /* y */
4217 	ib.ptr[ib.length_dw++] = 1; /* z */
4218 	ib.ptr[ib.length_dw++] =
4219 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4220 
4221 	/* write CS partial flush packet */
4222 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4223 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4224 
4225 	/* SGPR2 */
4226 	/* write the register state for the compute dispatch */
4227 	for (i = 0; i < gpr_reg_size; i++) {
4228 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4229 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4230 								- PACKET3_SET_SH_REG_START;
4231 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4232 	}
4233 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4234 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4235 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4236 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4237 							- PACKET3_SET_SH_REG_START;
4238 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4239 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4240 
4241 	/* write dispatch packet */
4242 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4243 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4244 	ib.ptr[ib.length_dw++] = 1; /* y */
4245 	ib.ptr[ib.length_dw++] = 1; /* z */
4246 	ib.ptr[ib.length_dw++] =
4247 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4248 
4249 	/* write CS partial flush packet */
4250 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4251 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4252 
4253 	/* shedule the ib on the ring */
4254 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4255 	if (r) {
4256 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4257 		goto fail;
4258 	}
4259 
4260 	/* wait for the GPU to finish processing the IB */
4261 	r = dma_fence_wait(f, false);
4262 	if (r) {
4263 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4264 		goto fail;
4265 	}
4266 
4267 	/* read back registers to clear the counters */
4268 	mutex_lock(&adev->grbm_idx_mutex);
4269 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4270 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4271 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4272 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4273 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4274 			}
4275 		}
4276 	}
4277 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4278 	mutex_unlock(&adev->grbm_idx_mutex);
4279 
4280 fail:
4281 	amdgpu_ib_free(adev, &ib, NULL);
4282 	dma_fence_put(f);
4283 
4284 	return r;
4285 }
4286 
4287 static int gfx_v9_0_early_init(void *handle)
4288 {
4289 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4290 
4291 	if (adev->asic_type == CHIP_ARCTURUS)
4292 		adev->gfx.num_gfx_rings = 0;
4293 	else
4294 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4295 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4296 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4297 	gfx_v9_0_set_ring_funcs(adev);
4298 	gfx_v9_0_set_irq_funcs(adev);
4299 	gfx_v9_0_set_gds_init(adev);
4300 	gfx_v9_0_set_rlc_funcs(adev);
4301 
4302 	return 0;
4303 }
4304 
4305 static int gfx_v9_0_ecc_late_init(void *handle)
4306 {
4307 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308 	int r;
4309 
4310 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4311 	if (r)
4312 		return r;
4313 
4314 	/* requires IBs so do in late init after IB pool is initialized */
4315 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4316 	if (r)
4317 		return r;
4318 
4319 	r = amdgpu_gfx_ras_late_init(adev);
4320 	if (r)
4321 		return r;
4322 
4323 	return 0;
4324 }
4325 
4326 static int gfx_v9_0_late_init(void *handle)
4327 {
4328 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4329 	int r;
4330 
4331 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4332 	if (r)
4333 		return r;
4334 
4335 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4336 	if (r)
4337 		return r;
4338 
4339 	r = gfx_v9_0_ecc_late_init(handle);
4340 	if (r)
4341 		return r;
4342 
4343 	return 0;
4344 }
4345 
4346 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4347 {
4348 	uint32_t rlc_setting;
4349 
4350 	/* if RLC is not enabled, do nothing */
4351 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4352 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4353 		return false;
4354 
4355 	return true;
4356 }
4357 
4358 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4359 {
4360 	uint32_t data;
4361 	unsigned i;
4362 
4363 	data = RLC_SAFE_MODE__CMD_MASK;
4364 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4365 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4366 
4367 	/* wait for RLC_SAFE_MODE */
4368 	for (i = 0; i < adev->usec_timeout; i++) {
4369 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4370 			break;
4371 		udelay(1);
4372 	}
4373 }
4374 
4375 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4376 {
4377 	uint32_t data;
4378 
4379 	data = RLC_SAFE_MODE__CMD_MASK;
4380 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4381 }
4382 
4383 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4384 						bool enable)
4385 {
4386 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4387 
4388 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4389 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4390 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4391 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4392 	} else {
4393 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4394 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4395 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4396 	}
4397 
4398 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4399 }
4400 
4401 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4402 						bool enable)
4403 {
4404 	/* TODO: double check if we need to perform under safe mode */
4405 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4406 
4407 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4408 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4409 	else
4410 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4411 
4412 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4413 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4414 	else
4415 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4416 
4417 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4418 }
4419 
4420 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4421 						      bool enable)
4422 {
4423 	uint32_t data, def;
4424 
4425 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4426 
4427 	/* It is disabled by HW by default */
4428 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4429 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4430 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4431 
4432 		if (adev->asic_type != CHIP_VEGA12)
4433 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4434 
4435 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4436 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4437 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4438 
4439 		/* only for Vega10 & Raven1 */
4440 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4441 
4442 		if (def != data)
4443 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4444 
4445 		/* MGLS is a global flag to control all MGLS in GFX */
4446 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4447 			/* 2 - RLC memory Light sleep */
4448 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4449 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4450 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4451 				if (def != data)
4452 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4453 			}
4454 			/* 3 - CP memory Light sleep */
4455 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4456 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4457 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4458 				if (def != data)
4459 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4460 			}
4461 		}
4462 	} else {
4463 		/* 1 - MGCG_OVERRIDE */
4464 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4465 
4466 		if (adev->asic_type != CHIP_VEGA12)
4467 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4468 
4469 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4470 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4471 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4472 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4473 
4474 		if (def != data)
4475 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4476 
4477 		/* 2 - disable MGLS in RLC */
4478 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4479 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4480 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4481 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4482 		}
4483 
4484 		/* 3 - disable MGLS in CP */
4485 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4486 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4487 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4488 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4489 		}
4490 	}
4491 
4492 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4493 }
4494 
4495 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4496 					   bool enable)
4497 {
4498 	uint32_t data, def;
4499 
4500 	if (adev->asic_type == CHIP_ARCTURUS)
4501 		return;
4502 
4503 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4504 
4505 	/* Enable 3D CGCG/CGLS */
4506 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4507 		/* write cmd to clear cgcg/cgls ov */
4508 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4509 		/* unset CGCG override */
4510 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4511 		/* update CGCG and CGLS override bits */
4512 		if (def != data)
4513 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4514 
4515 		/* enable 3Dcgcg FSM(0x0000363f) */
4516 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4517 
4518 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4519 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4520 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4521 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4522 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4523 		if (def != data)
4524 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4525 
4526 		/* set IDLE_POLL_COUNT(0x00900100) */
4527 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4528 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4529 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4530 		if (def != data)
4531 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4532 	} else {
4533 		/* Disable CGCG/CGLS */
4534 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4535 		/* disable cgcg, cgls should be disabled */
4536 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4537 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4538 		/* disable cgcg and cgls in FSM */
4539 		if (def != data)
4540 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4541 	}
4542 
4543 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4544 }
4545 
4546 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4547 						      bool enable)
4548 {
4549 	uint32_t def, data;
4550 
4551 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4552 
4553 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4554 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4555 		/* unset CGCG override */
4556 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4557 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4558 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4559 		else
4560 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4561 		/* update CGCG and CGLS override bits */
4562 		if (def != data)
4563 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4564 
4565 		/* enable cgcg FSM(0x0000363F) */
4566 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4567 
4568 		if (adev->asic_type == CHIP_ARCTURUS)
4569 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4570 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4571 		else
4572 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4573 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4574 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4575 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4576 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4577 		if (def != data)
4578 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4579 
4580 		/* set IDLE_POLL_COUNT(0x00900100) */
4581 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4582 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4583 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4584 		if (def != data)
4585 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4586 	} else {
4587 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4588 		/* reset CGCG/CGLS bits */
4589 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4590 		/* disable cgcg and cgls in FSM */
4591 		if (def != data)
4592 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4593 	}
4594 
4595 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4596 }
4597 
4598 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4599 					    bool enable)
4600 {
4601 	if (enable) {
4602 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4603 		 * ===  MGCG + MGLS ===
4604 		 */
4605 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4606 		/* ===  CGCG /CGLS for GFX 3D Only === */
4607 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4608 		/* ===  CGCG + CGLS === */
4609 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4610 	} else {
4611 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4612 		 * ===  CGCG + CGLS ===
4613 		 */
4614 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4615 		/* ===  CGCG /CGLS for GFX 3D Only === */
4616 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4617 		/* ===  MGCG + MGLS === */
4618 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4619 	}
4620 	return 0;
4621 }
4622 
4623 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4624 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4625 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4626 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4627 	.init = gfx_v9_0_rlc_init,
4628 	.get_csb_size = gfx_v9_0_get_csb_size,
4629 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4630 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4631 	.resume = gfx_v9_0_rlc_resume,
4632 	.stop = gfx_v9_0_rlc_stop,
4633 	.reset = gfx_v9_0_rlc_reset,
4634 	.start = gfx_v9_0_rlc_start
4635 };
4636 
4637 static int gfx_v9_0_set_powergating_state(void *handle,
4638 					  enum amd_powergating_state state)
4639 {
4640 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4641 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4642 
4643 	switch (adev->asic_type) {
4644 	case CHIP_RAVEN:
4645 	case CHIP_RENOIR:
4646 		if (!enable) {
4647 			amdgpu_gfx_off_ctrl(adev, false);
4648 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4649 		}
4650 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4651 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4652 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4653 		} else {
4654 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4655 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4656 		}
4657 
4658 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4659 			gfx_v9_0_enable_cp_power_gating(adev, true);
4660 		else
4661 			gfx_v9_0_enable_cp_power_gating(adev, false);
4662 
4663 		/* update gfx cgpg state */
4664 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4665 
4666 		/* update mgcg state */
4667 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4668 
4669 		if (enable)
4670 			amdgpu_gfx_off_ctrl(adev, true);
4671 		break;
4672 	case CHIP_VEGA12:
4673 		if (!enable) {
4674 			amdgpu_gfx_off_ctrl(adev, false);
4675 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4676 		} else {
4677 			amdgpu_gfx_off_ctrl(adev, true);
4678 		}
4679 		break;
4680 	default:
4681 		break;
4682 	}
4683 
4684 	return 0;
4685 }
4686 
4687 static int gfx_v9_0_set_clockgating_state(void *handle,
4688 					  enum amd_clockgating_state state)
4689 {
4690 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4691 
4692 	if (amdgpu_sriov_vf(adev))
4693 		return 0;
4694 
4695 	switch (adev->asic_type) {
4696 	case CHIP_VEGA10:
4697 	case CHIP_VEGA12:
4698 	case CHIP_VEGA20:
4699 	case CHIP_RAVEN:
4700 	case CHIP_ARCTURUS:
4701 	case CHIP_RENOIR:
4702 		gfx_v9_0_update_gfx_clock_gating(adev,
4703 						 state == AMD_CG_STATE_GATE ? true : false);
4704 		break;
4705 	default:
4706 		break;
4707 	}
4708 	return 0;
4709 }
4710 
4711 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4712 {
4713 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4714 	int data;
4715 
4716 	if (amdgpu_sriov_vf(adev))
4717 		*flags = 0;
4718 
4719 	/* AMD_CG_SUPPORT_GFX_MGCG */
4720 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4721 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4722 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4723 
4724 	/* AMD_CG_SUPPORT_GFX_CGCG */
4725 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4726 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4727 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4728 
4729 	/* AMD_CG_SUPPORT_GFX_CGLS */
4730 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4731 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4732 
4733 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4734 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4735 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4736 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4737 
4738 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4739 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4740 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4741 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4742 
4743 	if (adev->asic_type != CHIP_ARCTURUS) {
4744 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4745 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4746 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4747 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4748 
4749 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4750 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4751 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4752 	}
4753 }
4754 
4755 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4756 {
4757 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4758 }
4759 
4760 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4761 {
4762 	struct amdgpu_device *adev = ring->adev;
4763 	u64 wptr;
4764 
4765 	/* XXX check if swapping is necessary on BE */
4766 	if (ring->use_doorbell) {
4767 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4768 	} else {
4769 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4770 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4771 	}
4772 
4773 	return wptr;
4774 }
4775 
4776 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4777 {
4778 	struct amdgpu_device *adev = ring->adev;
4779 
4780 	if (ring->use_doorbell) {
4781 		/* XXX check if swapping is necessary on BE */
4782 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4783 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4784 	} else {
4785 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4786 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4787 	}
4788 }
4789 
4790 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4791 {
4792 	struct amdgpu_device *adev = ring->adev;
4793 	u32 ref_and_mask, reg_mem_engine;
4794 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4795 
4796 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4797 		switch (ring->me) {
4798 		case 1:
4799 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4800 			break;
4801 		case 2:
4802 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4803 			break;
4804 		default:
4805 			return;
4806 		}
4807 		reg_mem_engine = 0;
4808 	} else {
4809 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4810 		reg_mem_engine = 1; /* pfp */
4811 	}
4812 
4813 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4814 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4815 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4816 			      ref_and_mask, ref_and_mask, 0x20);
4817 }
4818 
4819 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4820 					struct amdgpu_job *job,
4821 					struct amdgpu_ib *ib,
4822 					uint32_t flags)
4823 {
4824 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4825 	u32 header, control = 0;
4826 
4827 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4828 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4829 	else
4830 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4831 
4832 	control |= ib->length_dw | (vmid << 24);
4833 
4834 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4835 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4836 
4837 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4838 			gfx_v9_0_ring_emit_de_meta(ring);
4839 	}
4840 
4841 	amdgpu_ring_write(ring, header);
4842 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4843 	amdgpu_ring_write(ring,
4844 #ifdef __BIG_ENDIAN
4845 		(2 << 0) |
4846 #endif
4847 		lower_32_bits(ib->gpu_addr));
4848 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4849 	amdgpu_ring_write(ring, control);
4850 }
4851 
4852 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4853 					  struct amdgpu_job *job,
4854 					  struct amdgpu_ib *ib,
4855 					  uint32_t flags)
4856 {
4857 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4858 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4859 
4860 	/* Currently, there is a high possibility to get wave ID mismatch
4861 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4862 	 * different wave IDs than the GDS expects. This situation happens
4863 	 * randomly when at least 5 compute pipes use GDS ordered append.
4864 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4865 	 * Those are probably bugs somewhere else in the kernel driver.
4866 	 *
4867 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4868 	 * GDS to 0 for this ring (me/pipe).
4869 	 */
4870 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4871 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4872 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4873 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4874 	}
4875 
4876 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4877 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4878 	amdgpu_ring_write(ring,
4879 #ifdef __BIG_ENDIAN
4880 				(2 << 0) |
4881 #endif
4882 				lower_32_bits(ib->gpu_addr));
4883 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4884 	amdgpu_ring_write(ring, control);
4885 }
4886 
4887 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4888 				     u64 seq, unsigned flags)
4889 {
4890 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4891 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4892 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4893 
4894 	/* RELEASE_MEM - flush caches, send int */
4895 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4896 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4897 					       EOP_TC_NC_ACTION_EN) :
4898 					      (EOP_TCL1_ACTION_EN |
4899 					       EOP_TC_ACTION_EN |
4900 					       EOP_TC_WB_ACTION_EN |
4901 					       EOP_TC_MD_ACTION_EN)) |
4902 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4903 				 EVENT_INDEX(5)));
4904 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4905 
4906 	/*
4907 	 * the address should be Qword aligned if 64bit write, Dword
4908 	 * aligned if only send 32bit data low (discard data high)
4909 	 */
4910 	if (write64bit)
4911 		BUG_ON(addr & 0x7);
4912 	else
4913 		BUG_ON(addr & 0x3);
4914 	amdgpu_ring_write(ring, lower_32_bits(addr));
4915 	amdgpu_ring_write(ring, upper_32_bits(addr));
4916 	amdgpu_ring_write(ring, lower_32_bits(seq));
4917 	amdgpu_ring_write(ring, upper_32_bits(seq));
4918 	amdgpu_ring_write(ring, 0);
4919 }
4920 
4921 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4922 {
4923 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4924 	uint32_t seq = ring->fence_drv.sync_seq;
4925 	uint64_t addr = ring->fence_drv.gpu_addr;
4926 
4927 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4928 			      lower_32_bits(addr), upper_32_bits(addr),
4929 			      seq, 0xffffffff, 4);
4930 }
4931 
4932 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4933 					unsigned vmid, uint64_t pd_addr)
4934 {
4935 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4936 
4937 	/* compute doesn't have PFP */
4938 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4939 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4940 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4941 		amdgpu_ring_write(ring, 0x0);
4942 	}
4943 }
4944 
4945 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4946 {
4947 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4948 }
4949 
4950 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4951 {
4952 	u64 wptr;
4953 
4954 	/* XXX check if swapping is necessary on BE */
4955 	if (ring->use_doorbell)
4956 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4957 	else
4958 		BUG();
4959 	return wptr;
4960 }
4961 
4962 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4963 					   bool acquire)
4964 {
4965 	struct amdgpu_device *adev = ring->adev;
4966 	int pipe_num, tmp, reg;
4967 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4968 
4969 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4970 
4971 	/* first me only has 2 entries, GFX and HP3D */
4972 	if (ring->me > 0)
4973 		pipe_num -= 2;
4974 
4975 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4976 	tmp = RREG32(reg);
4977 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4978 	WREG32(reg, tmp);
4979 }
4980 
4981 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4982 					    struct amdgpu_ring *ring,
4983 					    bool acquire)
4984 {
4985 	int i, pipe;
4986 	bool reserve;
4987 	struct amdgpu_ring *iring;
4988 
4989 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4990 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4991 	if (acquire)
4992 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4993 	else
4994 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4995 
4996 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4997 		/* Clear all reservations - everyone reacquires all resources */
4998 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4999 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5000 						       true);
5001 
5002 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5003 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5004 						       true);
5005 	} else {
5006 		/* Lower all pipes without a current reservation */
5007 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5008 			iring = &adev->gfx.gfx_ring[i];
5009 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5010 							   iring->me,
5011 							   iring->pipe,
5012 							   0);
5013 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5014 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5015 		}
5016 
5017 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5018 			iring = &adev->gfx.compute_ring[i];
5019 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5020 							   iring->me,
5021 							   iring->pipe,
5022 							   0);
5023 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5024 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5025 		}
5026 	}
5027 
5028 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5029 }
5030 
5031 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5032 				      struct amdgpu_ring *ring,
5033 				      bool acquire)
5034 {
5035 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5036 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5037 
5038 	mutex_lock(&adev->srbm_mutex);
5039 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5040 
5041 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5042 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5043 
5044 	soc15_grbm_select(adev, 0, 0, 0, 0);
5045 	mutex_unlock(&adev->srbm_mutex);
5046 }
5047 
5048 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5049 					       enum drm_sched_priority priority)
5050 {
5051 	struct amdgpu_device *adev = ring->adev;
5052 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5053 
5054 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5055 		return;
5056 
5057 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5058 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5059 }
5060 
5061 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5062 {
5063 	struct amdgpu_device *adev = ring->adev;
5064 
5065 	/* XXX check if swapping is necessary on BE */
5066 	if (ring->use_doorbell) {
5067 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5068 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5069 	} else{
5070 		BUG(); /* only DOORBELL method supported on gfx9 now */
5071 	}
5072 }
5073 
5074 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5075 					 u64 seq, unsigned int flags)
5076 {
5077 	struct amdgpu_device *adev = ring->adev;
5078 
5079 	/* we only allocate 32bit for each seq wb address */
5080 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5081 
5082 	/* write fence seq to the "addr" */
5083 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5084 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5085 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5086 	amdgpu_ring_write(ring, lower_32_bits(addr));
5087 	amdgpu_ring_write(ring, upper_32_bits(addr));
5088 	amdgpu_ring_write(ring, lower_32_bits(seq));
5089 
5090 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5091 		/* set register to trigger INT */
5092 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5093 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5094 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5095 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5096 		amdgpu_ring_write(ring, 0);
5097 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5098 	}
5099 }
5100 
5101 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5102 {
5103 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5104 	amdgpu_ring_write(ring, 0);
5105 }
5106 
5107 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5108 {
5109 	struct v9_ce_ib_state ce_payload = {0};
5110 	uint64_t csa_addr;
5111 	int cnt;
5112 
5113 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5114 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5115 
5116 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5117 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5118 				 WRITE_DATA_DST_SEL(8) |
5119 				 WR_CONFIRM) |
5120 				 WRITE_DATA_CACHE_POLICY(0));
5121 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5122 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5123 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5124 }
5125 
5126 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5127 {
5128 	struct v9_de_ib_state de_payload = {0};
5129 	uint64_t csa_addr, gds_addr;
5130 	int cnt;
5131 
5132 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5133 	gds_addr = csa_addr + 4096;
5134 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5135 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5136 
5137 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5138 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5139 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5140 				 WRITE_DATA_DST_SEL(8) |
5141 				 WR_CONFIRM) |
5142 				 WRITE_DATA_CACHE_POLICY(0));
5143 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5144 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5145 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5146 }
5147 
5148 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5149 {
5150 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5151 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5152 }
5153 
5154 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5155 {
5156 	uint32_t dw2 = 0;
5157 
5158 	if (amdgpu_sriov_vf(ring->adev))
5159 		gfx_v9_0_ring_emit_ce_meta(ring);
5160 
5161 	gfx_v9_0_ring_emit_tmz(ring, true);
5162 
5163 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5164 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5165 		/* set load_global_config & load_global_uconfig */
5166 		dw2 |= 0x8001;
5167 		/* set load_cs_sh_regs */
5168 		dw2 |= 0x01000000;
5169 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5170 		dw2 |= 0x10002;
5171 
5172 		/* set load_ce_ram if preamble presented */
5173 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5174 			dw2 |= 0x10000000;
5175 	} else {
5176 		/* still load_ce_ram if this is the first time preamble presented
5177 		 * although there is no context switch happens.
5178 		 */
5179 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5180 			dw2 |= 0x10000000;
5181 	}
5182 
5183 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5184 	amdgpu_ring_write(ring, dw2);
5185 	amdgpu_ring_write(ring, 0);
5186 }
5187 
5188 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5189 {
5190 	unsigned ret;
5191 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5192 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5193 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5194 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5195 	ret = ring->wptr & ring->buf_mask;
5196 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5197 	return ret;
5198 }
5199 
5200 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5201 {
5202 	unsigned cur;
5203 	BUG_ON(offset > ring->buf_mask);
5204 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5205 
5206 	cur = (ring->wptr & ring->buf_mask) - 1;
5207 	if (likely(cur > offset))
5208 		ring->ring[offset] = cur - offset;
5209 	else
5210 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5211 }
5212 
5213 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5214 {
5215 	struct amdgpu_device *adev = ring->adev;
5216 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5217 
5218 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5219 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5220 				(5 << 8) |	/* dst: memory */
5221 				(1 << 20));	/* write confirm */
5222 	amdgpu_ring_write(ring, reg);
5223 	amdgpu_ring_write(ring, 0);
5224 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5225 				kiq->reg_val_offs * 4));
5226 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5227 				kiq->reg_val_offs * 4));
5228 }
5229 
5230 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5231 				    uint32_t val)
5232 {
5233 	uint32_t cmd = 0;
5234 
5235 	switch (ring->funcs->type) {
5236 	case AMDGPU_RING_TYPE_GFX:
5237 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5238 		break;
5239 	case AMDGPU_RING_TYPE_KIQ:
5240 		cmd = (1 << 16); /* no inc addr */
5241 		break;
5242 	default:
5243 		cmd = WR_CONFIRM;
5244 		break;
5245 	}
5246 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5247 	amdgpu_ring_write(ring, cmd);
5248 	amdgpu_ring_write(ring, reg);
5249 	amdgpu_ring_write(ring, 0);
5250 	amdgpu_ring_write(ring, val);
5251 }
5252 
5253 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5254 					uint32_t val, uint32_t mask)
5255 {
5256 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5257 }
5258 
5259 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5260 						  uint32_t reg0, uint32_t reg1,
5261 						  uint32_t ref, uint32_t mask)
5262 {
5263 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5264 	struct amdgpu_device *adev = ring->adev;
5265 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5266 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5267 
5268 	if (fw_version_ok)
5269 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5270 				      ref, mask, 0x20);
5271 	else
5272 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5273 							   ref, mask);
5274 }
5275 
5276 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5277 {
5278 	struct amdgpu_device *adev = ring->adev;
5279 	uint32_t value = 0;
5280 
5281 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5282 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5283 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5284 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5285 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5286 }
5287 
5288 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5289 						 enum amdgpu_interrupt_state state)
5290 {
5291 	switch (state) {
5292 	case AMDGPU_IRQ_STATE_DISABLE:
5293 	case AMDGPU_IRQ_STATE_ENABLE:
5294 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5295 			       TIME_STAMP_INT_ENABLE,
5296 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5297 		break;
5298 	default:
5299 		break;
5300 	}
5301 }
5302 
5303 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5304 						     int me, int pipe,
5305 						     enum amdgpu_interrupt_state state)
5306 {
5307 	u32 mec_int_cntl, mec_int_cntl_reg;
5308 
5309 	/*
5310 	 * amdgpu controls only the first MEC. That's why this function only
5311 	 * handles the setting of interrupts for this specific MEC. All other
5312 	 * pipes' interrupts are set by amdkfd.
5313 	 */
5314 
5315 	if (me == 1) {
5316 		switch (pipe) {
5317 		case 0:
5318 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5319 			break;
5320 		case 1:
5321 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5322 			break;
5323 		case 2:
5324 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5325 			break;
5326 		case 3:
5327 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5328 			break;
5329 		default:
5330 			DRM_DEBUG("invalid pipe %d\n", pipe);
5331 			return;
5332 		}
5333 	} else {
5334 		DRM_DEBUG("invalid me %d\n", me);
5335 		return;
5336 	}
5337 
5338 	switch (state) {
5339 	case AMDGPU_IRQ_STATE_DISABLE:
5340 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5341 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5342 					     TIME_STAMP_INT_ENABLE, 0);
5343 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5344 		break;
5345 	case AMDGPU_IRQ_STATE_ENABLE:
5346 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5347 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5348 					     TIME_STAMP_INT_ENABLE, 1);
5349 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5350 		break;
5351 	default:
5352 		break;
5353 	}
5354 }
5355 
5356 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5357 					     struct amdgpu_irq_src *source,
5358 					     unsigned type,
5359 					     enum amdgpu_interrupt_state state)
5360 {
5361 	switch (state) {
5362 	case AMDGPU_IRQ_STATE_DISABLE:
5363 	case AMDGPU_IRQ_STATE_ENABLE:
5364 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5365 			       PRIV_REG_INT_ENABLE,
5366 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5367 		break;
5368 	default:
5369 		break;
5370 	}
5371 
5372 	return 0;
5373 }
5374 
5375 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5376 					      struct amdgpu_irq_src *source,
5377 					      unsigned type,
5378 					      enum amdgpu_interrupt_state state)
5379 {
5380 	switch (state) {
5381 	case AMDGPU_IRQ_STATE_DISABLE:
5382 	case AMDGPU_IRQ_STATE_ENABLE:
5383 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5384 			       PRIV_INSTR_INT_ENABLE,
5385 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5386 	default:
5387 		break;
5388 	}
5389 
5390 	return 0;
5391 }
5392 
5393 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5394 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5395 			CP_ECC_ERROR_INT_ENABLE, 1)
5396 
5397 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5398 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5399 			CP_ECC_ERROR_INT_ENABLE, 0)
5400 
5401 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5402 					      struct amdgpu_irq_src *source,
5403 					      unsigned type,
5404 					      enum amdgpu_interrupt_state state)
5405 {
5406 	switch (state) {
5407 	case AMDGPU_IRQ_STATE_DISABLE:
5408 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5409 				CP_ECC_ERROR_INT_ENABLE, 0);
5410 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5411 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5412 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5413 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5414 		break;
5415 
5416 	case AMDGPU_IRQ_STATE_ENABLE:
5417 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5418 				CP_ECC_ERROR_INT_ENABLE, 1);
5419 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5420 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5421 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5422 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5423 		break;
5424 	default:
5425 		break;
5426 	}
5427 
5428 	return 0;
5429 }
5430 
5431 
5432 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5433 					    struct amdgpu_irq_src *src,
5434 					    unsigned type,
5435 					    enum amdgpu_interrupt_state state)
5436 {
5437 	switch (type) {
5438 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5439 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5440 		break;
5441 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5442 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5443 		break;
5444 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5445 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5446 		break;
5447 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5448 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5449 		break;
5450 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5451 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5452 		break;
5453 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5454 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5455 		break;
5456 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5457 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5458 		break;
5459 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5460 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5461 		break;
5462 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5463 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5464 		break;
5465 	default:
5466 		break;
5467 	}
5468 	return 0;
5469 }
5470 
5471 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5472 			    struct amdgpu_irq_src *source,
5473 			    struct amdgpu_iv_entry *entry)
5474 {
5475 	int i;
5476 	u8 me_id, pipe_id, queue_id;
5477 	struct amdgpu_ring *ring;
5478 
5479 	DRM_DEBUG("IH: CP EOP\n");
5480 	me_id = (entry->ring_id & 0x0c) >> 2;
5481 	pipe_id = (entry->ring_id & 0x03) >> 0;
5482 	queue_id = (entry->ring_id & 0x70) >> 4;
5483 
5484 	switch (me_id) {
5485 	case 0:
5486 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5487 		break;
5488 	case 1:
5489 	case 2:
5490 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5491 			ring = &adev->gfx.compute_ring[i];
5492 			/* Per-queue interrupt is supported for MEC starting from VI.
5493 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5494 			  */
5495 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5496 				amdgpu_fence_process(ring);
5497 		}
5498 		break;
5499 	}
5500 	return 0;
5501 }
5502 
5503 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5504 			   struct amdgpu_iv_entry *entry)
5505 {
5506 	u8 me_id, pipe_id, queue_id;
5507 	struct amdgpu_ring *ring;
5508 	int i;
5509 
5510 	me_id = (entry->ring_id & 0x0c) >> 2;
5511 	pipe_id = (entry->ring_id & 0x03) >> 0;
5512 	queue_id = (entry->ring_id & 0x70) >> 4;
5513 
5514 	switch (me_id) {
5515 	case 0:
5516 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5517 		break;
5518 	case 1:
5519 	case 2:
5520 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5521 			ring = &adev->gfx.compute_ring[i];
5522 			if (ring->me == me_id && ring->pipe == pipe_id &&
5523 			    ring->queue == queue_id)
5524 				drm_sched_fault(&ring->sched);
5525 		}
5526 		break;
5527 	}
5528 }
5529 
5530 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5531 				 struct amdgpu_irq_src *source,
5532 				 struct amdgpu_iv_entry *entry)
5533 {
5534 	DRM_ERROR("Illegal register access in command stream\n");
5535 	gfx_v9_0_fault(adev, entry);
5536 	return 0;
5537 }
5538 
5539 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5540 				  struct amdgpu_irq_src *source,
5541 				  struct amdgpu_iv_entry *entry)
5542 {
5543 	DRM_ERROR("Illegal instruction in command stream\n");
5544 	gfx_v9_0_fault(adev, entry);
5545 	return 0;
5546 }
5547 
5548 
5549 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5550 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5551 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5552 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5553 	},
5554 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5555 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5556 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5557 	},
5558 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5559 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5560 	  0, 0
5561 	},
5562 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5563 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5564 	  0, 0
5565 	},
5566 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5567 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5568 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5569 	},
5570 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5571 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5572 	  0, 0
5573 	},
5574 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5575 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5576 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5577 	},
5578 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5579 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5580 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5581 	},
5582 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5583 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5584 	  0, 0
5585 	},
5586 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5587 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5588 	  0, 0
5589 	},
5590 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5591 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5592 	  0, 0
5593 	},
5594 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5595 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5596 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5597 	},
5598 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5599 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5600 	  0, 0
5601 	},
5602 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5603 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5604 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5605 	},
5606 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5607 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5608 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5609 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5610 	},
5611 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5612 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5613 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5614 	  0, 0
5615 	},
5616 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5617 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5618 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5619 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5620 	},
5621 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5622 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5623 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5624 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5625 	},
5626 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5627 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5628 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5629 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5630 	},
5631 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5632 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5633 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5634 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5635 	},
5636 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5637 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5638 	  0, 0
5639 	},
5640 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5641 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5642 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5643 	},
5644 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5645 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5646 	  0, 0
5647 	},
5648 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5649 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5650 	  0, 0
5651 	},
5652 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5653 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5654 	  0, 0
5655 	},
5656 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5657 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5658 	  0, 0
5659 	},
5660 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5661 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5662 	  0, 0
5663 	},
5664 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5665 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5666 	  0, 0
5667 	},
5668 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5669 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5670 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5671 	},
5672 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5673 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5674 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5675 	},
5676 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5677 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5678 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5679 	},
5680 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5681 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5682 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5683 	},
5684 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5685 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5686 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5687 	},
5688 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5689 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5690 	  0, 0
5691 	},
5692 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5693 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5694 	  0, 0
5695 	},
5696 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5697 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5698 	  0, 0
5699 	},
5700 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5701 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5702 	  0, 0
5703 	},
5704 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5705 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5706 	  0, 0
5707 	},
5708 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5709 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5710 	  0, 0
5711 	},
5712 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5713 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5714 	  0, 0
5715 	},
5716 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5717 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5718 	  0, 0
5719 	},
5720 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5721 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5722 	  0, 0
5723 	},
5724 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5725 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5726 	  0, 0
5727 	},
5728 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5729 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5730 	  0, 0
5731 	},
5732 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5733 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5734 	  0, 0
5735 	},
5736 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5737 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5738 	  0, 0
5739 	},
5740 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5741 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5742 	  0, 0
5743 	},
5744 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5745 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5746 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5747 	},
5748 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5749 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5750 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5751 	},
5752 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5753 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5754 	  0, 0
5755 	},
5756 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5757 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5758 	  0, 0
5759 	},
5760 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5761 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5762 	  0, 0
5763 	},
5764 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5765 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5766 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5767 	},
5768 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5769 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5770 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5771 	},
5772 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5773 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5774 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5775 	},
5776 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5777 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5778 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5779 	},
5780 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5781 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5782 	  0, 0
5783 	},
5784 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5785 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5786 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5787 	},
5788 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5789 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5790 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5791 	},
5792 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5793 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5794 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5795 	},
5796 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5797 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5798 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5799 	},
5800 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5801 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5802 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5803 	},
5804 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5805 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5806 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5807 	},
5808 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5809 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5810 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5811 	},
5812 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5813 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5814 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5815 	},
5816 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5817 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5818 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5819 	},
5820 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5821 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5822 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5823 	},
5824 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5825 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5826 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5827 	},
5828 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5829 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5830 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5831 	},
5832 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5833 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5834 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5835 	},
5836 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5837 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5838 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5839 	},
5840 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5841 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5842 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5843 	},
5844 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5845 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5846 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5847 	},
5848 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5849 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5850 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5851 	},
5852 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5853 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5854 	  0, 0
5855 	},
5856 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5857 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5858 	  0, 0
5859 	},
5860 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5861 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5862 	  0, 0
5863 	},
5864 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5865 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5866 	  0, 0
5867 	},
5868 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5869 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5870 	  0, 0
5871 	},
5872 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5873 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5874 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5875 	},
5876 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5877 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5878 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5879 	},
5880 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5881 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5882 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5883 	},
5884 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5885 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5886 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5887 	},
5888 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5889 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5890 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5891 	},
5892 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5893 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5894 	  0, 0
5895 	},
5896 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5897 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5898 	  0, 0
5899 	},
5900 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5901 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5902 	  0, 0
5903 	},
5904 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5905 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5906 	  0, 0
5907 	},
5908 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5909 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5910 	  0, 0
5911 	},
5912 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5913 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5914 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5915 	},
5916 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5917 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5918 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5919 	},
5920 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5921 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5922 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5923 	},
5924 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5925 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5926 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5927 	},
5928 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5929 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5930 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5931 	},
5932 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5933 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5934 	  0, 0
5935 	},
5936 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5937 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5938 	  0, 0
5939 	},
5940 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5941 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5942 	  0, 0
5943 	},
5944 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5945 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5946 	  0, 0
5947 	},
5948 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5949 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5950 	  0, 0
5951 	},
5952 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5953 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5954 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5955 	},
5956 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5957 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5958 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5959 	},
5960 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5961 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5962 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5963 	},
5964 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5965 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5966 	  0, 0
5967 	},
5968 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5969 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5970 	  0, 0
5971 	},
5972 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5973 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5974 	  0, 0
5975 	},
5976 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5977 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5978 	  0, 0
5979 	},
5980 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5981 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5982 	  0, 0
5983 	},
5984 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5985 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5986 	  0, 0
5987 	}
5988 };
5989 
5990 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5991 				     void *inject_if)
5992 {
5993 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5994 	int ret;
5995 	struct ta_ras_trigger_error_input block_info = { 0 };
5996 
5997 	if (adev->asic_type != CHIP_VEGA20)
5998 		return -EINVAL;
5999 
6000 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6001 		return -EINVAL;
6002 
6003 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6004 		return -EPERM;
6005 
6006 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6007 	      info->head.type)) {
6008 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6009 			ras_gfx_subblocks[info->head.sub_block_index].name,
6010 			info->head.type);
6011 		return -EPERM;
6012 	}
6013 
6014 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6015 	      info->head.type)) {
6016 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6017 			ras_gfx_subblocks[info->head.sub_block_index].name,
6018 			info->head.type);
6019 		return -EPERM;
6020 	}
6021 
6022 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6023 	block_info.sub_block_index =
6024 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6025 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6026 	block_info.address = info->address;
6027 	block_info.value = info->value;
6028 
6029 	mutex_lock(&adev->grbm_idx_mutex);
6030 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6031 	mutex_unlock(&adev->grbm_idx_mutex);
6032 
6033 	return ret;
6034 }
6035 
6036 static const char *vml2_mems[] = {
6037 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6038 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6039 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6040 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6041 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6042 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6043 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6044 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6045 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6046 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6047 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6048 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6049 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6050 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6051 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6052 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6053 };
6054 
6055 static const char *vml2_walker_mems[] = {
6056 	"UTC_VML2_CACHE_PDE0_MEM0",
6057 	"UTC_VML2_CACHE_PDE0_MEM1",
6058 	"UTC_VML2_CACHE_PDE1_MEM0",
6059 	"UTC_VML2_CACHE_PDE1_MEM1",
6060 	"UTC_VML2_CACHE_PDE2_MEM0",
6061 	"UTC_VML2_CACHE_PDE2_MEM1",
6062 	"UTC_VML2_RDIF_LOG_FIFO",
6063 };
6064 
6065 static const char *atc_l2_cache_2m_mems[] = {
6066 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6067 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6068 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6069 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6070 };
6071 
6072 static const char *atc_l2_cache_4k_mems[] = {
6073 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6074 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6075 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6076 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6077 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6078 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6079 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6080 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6081 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6082 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6083 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6084 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6085 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6086 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6087 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6088 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6089 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6090 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6091 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6092 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6093 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6094 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6095 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6096 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6097 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6098 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6099 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6100 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6101 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6102 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6103 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6104 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6105 };
6106 
6107 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6108 					 struct ras_err_data *err_data)
6109 {
6110 	uint32_t i, data;
6111 	uint32_t sec_count, ded_count;
6112 
6113 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6114 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6115 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6116 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6117 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6118 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6119 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6120 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6121 
6122 	for (i = 0; i < 16; i++) {
6123 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6124 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6125 
6126 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6127 		if (sec_count) {
6128 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6129 				 vml2_mems[i], sec_count);
6130 			err_data->ce_count += sec_count;
6131 		}
6132 
6133 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6134 		if (ded_count) {
6135 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6136 				 vml2_mems[i], ded_count);
6137 			err_data->ue_count += ded_count;
6138 		}
6139 	}
6140 
6141 	for (i = 0; i < 7; i++) {
6142 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6143 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6144 
6145 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6146 						SEC_COUNT);
6147 		if (sec_count) {
6148 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6149 				 vml2_walker_mems[i], sec_count);
6150 			err_data->ce_count += sec_count;
6151 		}
6152 
6153 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6154 						DED_COUNT);
6155 		if (ded_count) {
6156 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6157 				 vml2_walker_mems[i], ded_count);
6158 			err_data->ue_count += ded_count;
6159 		}
6160 	}
6161 
6162 	for (i = 0; i < 4; i++) {
6163 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6164 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6165 
6166 		sec_count = (data & 0x00006000L) >> 0xd;
6167 		if (sec_count) {
6168 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6169 				 atc_l2_cache_2m_mems[i], sec_count);
6170 			err_data->ce_count += sec_count;
6171 		}
6172 	}
6173 
6174 	for (i = 0; i < 32; i++) {
6175 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6176 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6177 
6178 		sec_count = (data & 0x00006000L) >> 0xd;
6179 		if (sec_count) {
6180 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6181 				 atc_l2_cache_4k_mems[i], sec_count);
6182 			err_data->ce_count += sec_count;
6183 		}
6184 
6185 		ded_count = (data & 0x00018000L) >> 0xf;
6186 		if (ded_count) {
6187 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6188 				 atc_l2_cache_4k_mems[i], ded_count);
6189 			err_data->ue_count += ded_count;
6190 		}
6191 	}
6192 
6193 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6194 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6195 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6196 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6197 
6198 	return 0;
6199 }
6200 
6201 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6202 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6203 	uint32_t *sec_count, uint32_t *ded_count)
6204 {
6205 	uint32_t i;
6206 	uint32_t sec_cnt, ded_cnt;
6207 
6208 	for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6209 		if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6210 			gc_ras_fields_vg20[i].seg != reg->seg ||
6211 			gc_ras_fields_vg20[i].inst != reg->inst)
6212 			continue;
6213 
6214 		sec_cnt = (value &
6215 				gc_ras_fields_vg20[i].sec_count_mask) >>
6216 				gc_ras_fields_vg20[i].sec_count_shift;
6217 		if (sec_cnt) {
6218 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6219 				gc_ras_fields_vg20[i].name,
6220 				se_id, inst_id,
6221 				sec_cnt);
6222 			*sec_count += sec_cnt;
6223 		}
6224 
6225 		ded_cnt = (value &
6226 				gc_ras_fields_vg20[i].ded_count_mask) >>
6227 				gc_ras_fields_vg20[i].ded_count_shift;
6228 		if (ded_cnt) {
6229 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6230 				gc_ras_fields_vg20[i].name,
6231 				se_id, inst_id,
6232 				ded_cnt);
6233 			*ded_count += ded_cnt;
6234 		}
6235 	}
6236 
6237 	return 0;
6238 }
6239 
6240 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6241 					  void *ras_error_status)
6242 {
6243 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6244 	uint32_t sec_count = 0, ded_count = 0;
6245 	uint32_t i, j, k;
6246 	uint32_t reg_value;
6247 
6248 	if (adev->asic_type != CHIP_VEGA20)
6249 		return -EINVAL;
6250 
6251 	err_data->ue_count = 0;
6252 	err_data->ce_count = 0;
6253 
6254 	mutex_lock(&adev->grbm_idx_mutex);
6255 
6256 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6257 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6258 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6259 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6260 				reg_value =
6261 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6262 				if (reg_value)
6263 					__get_ras_error_count(&sec_ded_counter_registers[i],
6264 							j, k, reg_value,
6265 							&sec_count, &ded_count);
6266 			}
6267 		}
6268 	}
6269 
6270 	err_data->ce_count += sec_count;
6271 	err_data->ue_count += ded_count;
6272 
6273 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6274 	mutex_unlock(&adev->grbm_idx_mutex);
6275 
6276 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6277 
6278 	return 0;
6279 }
6280 
6281 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6282 	.name = "gfx_v9_0",
6283 	.early_init = gfx_v9_0_early_init,
6284 	.late_init = gfx_v9_0_late_init,
6285 	.sw_init = gfx_v9_0_sw_init,
6286 	.sw_fini = gfx_v9_0_sw_fini,
6287 	.hw_init = gfx_v9_0_hw_init,
6288 	.hw_fini = gfx_v9_0_hw_fini,
6289 	.suspend = gfx_v9_0_suspend,
6290 	.resume = gfx_v9_0_resume,
6291 	.is_idle = gfx_v9_0_is_idle,
6292 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6293 	.soft_reset = gfx_v9_0_soft_reset,
6294 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6295 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6296 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6297 };
6298 
6299 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6300 	.type = AMDGPU_RING_TYPE_GFX,
6301 	.align_mask = 0xff,
6302 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6303 	.support_64bit_ptrs = true,
6304 	.vmhub = AMDGPU_GFXHUB_0,
6305 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6306 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6307 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6308 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6309 		5 +  /* COND_EXEC */
6310 		7 +  /* PIPELINE_SYNC */
6311 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6312 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6313 		2 + /* VM_FLUSH */
6314 		8 +  /* FENCE for VM_FLUSH */
6315 		20 + /* GDS switch */
6316 		4 + /* double SWITCH_BUFFER,
6317 		       the first COND_EXEC jump to the place just
6318 			   prior to this double SWITCH_BUFFER  */
6319 		5 + /* COND_EXEC */
6320 		7 +	 /*	HDP_flush */
6321 		4 +	 /*	VGT_flush */
6322 		14 + /*	CE_META */
6323 		31 + /*	DE_META */
6324 		3 + /* CNTX_CTRL */
6325 		5 + /* HDP_INVL */
6326 		8 + 8 + /* FENCE x2 */
6327 		2, /* SWITCH_BUFFER */
6328 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6329 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6330 	.emit_fence = gfx_v9_0_ring_emit_fence,
6331 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6332 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6333 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6334 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6335 	.test_ring = gfx_v9_0_ring_test_ring,
6336 	.test_ib = gfx_v9_0_ring_test_ib,
6337 	.insert_nop = amdgpu_ring_insert_nop,
6338 	.pad_ib = amdgpu_ring_generic_pad_ib,
6339 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6340 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6341 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6342 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6343 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6344 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6345 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6346 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6347 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6348 };
6349 
6350 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6351 	.type = AMDGPU_RING_TYPE_COMPUTE,
6352 	.align_mask = 0xff,
6353 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6354 	.support_64bit_ptrs = true,
6355 	.vmhub = AMDGPU_GFXHUB_0,
6356 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6357 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6358 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6359 	.emit_frame_size =
6360 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6361 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6362 		5 + /* hdp invalidate */
6363 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6364 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6365 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6366 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6367 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6368 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6369 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6370 	.emit_fence = gfx_v9_0_ring_emit_fence,
6371 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6372 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6373 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6374 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6375 	.test_ring = gfx_v9_0_ring_test_ring,
6376 	.test_ib = gfx_v9_0_ring_test_ib,
6377 	.insert_nop = amdgpu_ring_insert_nop,
6378 	.pad_ib = amdgpu_ring_generic_pad_ib,
6379 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6380 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6381 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6382 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6383 };
6384 
6385 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6386 	.type = AMDGPU_RING_TYPE_KIQ,
6387 	.align_mask = 0xff,
6388 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6389 	.support_64bit_ptrs = true,
6390 	.vmhub = AMDGPU_GFXHUB_0,
6391 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6392 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6393 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6394 	.emit_frame_size =
6395 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6396 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6397 		5 + /* hdp invalidate */
6398 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6399 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6400 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6401 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6402 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6403 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6404 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6405 	.test_ring = gfx_v9_0_ring_test_ring,
6406 	.insert_nop = amdgpu_ring_insert_nop,
6407 	.pad_ib = amdgpu_ring_generic_pad_ib,
6408 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6409 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6410 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6411 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6412 };
6413 
6414 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6415 {
6416 	int i;
6417 
6418 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6419 
6420 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6421 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6422 
6423 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6424 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6425 }
6426 
6427 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6428 	.set = gfx_v9_0_set_eop_interrupt_state,
6429 	.process = gfx_v9_0_eop_irq,
6430 };
6431 
6432 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6433 	.set = gfx_v9_0_set_priv_reg_fault_state,
6434 	.process = gfx_v9_0_priv_reg_irq,
6435 };
6436 
6437 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6438 	.set = gfx_v9_0_set_priv_inst_fault_state,
6439 	.process = gfx_v9_0_priv_inst_irq,
6440 };
6441 
6442 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6443 	.set = gfx_v9_0_set_cp_ecc_error_state,
6444 	.process = amdgpu_gfx_cp_ecc_error_irq,
6445 };
6446 
6447 
6448 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6449 {
6450 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6451 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6452 
6453 	adev->gfx.priv_reg_irq.num_types = 1;
6454 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6455 
6456 	adev->gfx.priv_inst_irq.num_types = 1;
6457 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6458 
6459 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6460 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6461 }
6462 
6463 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6464 {
6465 	switch (adev->asic_type) {
6466 	case CHIP_VEGA10:
6467 	case CHIP_VEGA12:
6468 	case CHIP_VEGA20:
6469 	case CHIP_RAVEN:
6470 	case CHIP_ARCTURUS:
6471 	case CHIP_RENOIR:
6472 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6473 		break;
6474 	default:
6475 		break;
6476 	}
6477 }
6478 
6479 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6480 {
6481 	/* init asci gds info */
6482 	switch (adev->asic_type) {
6483 	case CHIP_VEGA10:
6484 	case CHIP_VEGA12:
6485 	case CHIP_VEGA20:
6486 		adev->gds.gds_size = 0x10000;
6487 		break;
6488 	case CHIP_RAVEN:
6489 	case CHIP_ARCTURUS:
6490 		adev->gds.gds_size = 0x1000;
6491 		break;
6492 	default:
6493 		adev->gds.gds_size = 0x10000;
6494 		break;
6495 	}
6496 
6497 	switch (adev->asic_type) {
6498 	case CHIP_VEGA10:
6499 	case CHIP_VEGA20:
6500 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6501 		break;
6502 	case CHIP_VEGA12:
6503 		adev->gds.gds_compute_max_wave_id = 0x27f;
6504 		break;
6505 	case CHIP_RAVEN:
6506 		if (adev->rev_id >= 0x8)
6507 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6508 		else
6509 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6510 		break;
6511 	case CHIP_ARCTURUS:
6512 		adev->gds.gds_compute_max_wave_id = 0xfff;
6513 		break;
6514 	default:
6515 		/* this really depends on the chip */
6516 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6517 		break;
6518 	}
6519 
6520 	adev->gds.gws_size = 64;
6521 	adev->gds.oa_size = 16;
6522 }
6523 
6524 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6525 						 u32 bitmap)
6526 {
6527 	u32 data;
6528 
6529 	if (!bitmap)
6530 		return;
6531 
6532 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6533 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6534 
6535 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6536 }
6537 
6538 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6539 {
6540 	u32 data, mask;
6541 
6542 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6543 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6544 
6545 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6546 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6547 
6548 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6549 
6550 	return (~data) & mask;
6551 }
6552 
6553 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6554 				 struct amdgpu_cu_info *cu_info)
6555 {
6556 	int i, j, k, counter, active_cu_number = 0;
6557 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6558 	unsigned disable_masks[4 * 4];
6559 
6560 	if (!adev || !cu_info)
6561 		return -EINVAL;
6562 
6563 	/*
6564 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6565 	 */
6566 	if (adev->gfx.config.max_shader_engines *
6567 		adev->gfx.config.max_sh_per_se > 16)
6568 		return -EINVAL;
6569 
6570 	amdgpu_gfx_parse_disable_cu(disable_masks,
6571 				    adev->gfx.config.max_shader_engines,
6572 				    adev->gfx.config.max_sh_per_se);
6573 
6574 	mutex_lock(&adev->grbm_idx_mutex);
6575 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6576 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6577 			mask = 1;
6578 			ao_bitmap = 0;
6579 			counter = 0;
6580 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6581 			gfx_v9_0_set_user_cu_inactive_bitmap(
6582 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6583 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6584 
6585 			/*
6586 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6587 			 * 4x4 size array, and it's usually suitable for Vega
6588 			 * ASICs which has 4*2 SE/SH layout.
6589 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6590 			 * To mostly reduce the impact, we make it compatible
6591 			 * with current bitmap array as below:
6592 			 *    SE4,SH0 --> bitmap[0][1]
6593 			 *    SE5,SH0 --> bitmap[1][1]
6594 			 *    SE6,SH0 --> bitmap[2][1]
6595 			 *    SE7,SH0 --> bitmap[3][1]
6596 			 */
6597 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6598 
6599 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6600 				if (bitmap & mask) {
6601 					if (counter < adev->gfx.config.max_cu_per_sh)
6602 						ao_bitmap |= mask;
6603 					counter ++;
6604 				}
6605 				mask <<= 1;
6606 			}
6607 			active_cu_number += counter;
6608 			if (i < 2 && j < 2)
6609 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6610 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6611 		}
6612 	}
6613 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6614 	mutex_unlock(&adev->grbm_idx_mutex);
6615 
6616 	cu_info->number = active_cu_number;
6617 	cu_info->ao_cu_mask = ao_cu_mask;
6618 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6619 
6620 	return 0;
6621 }
6622 
6623 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6624 {
6625 	.type = AMD_IP_BLOCK_TYPE_GFX,
6626 	.major = 9,
6627 	.minor = 0,
6628 	.rev = 0,
6629 	.funcs = &gfx_v9_0_ip_funcs,
6630 };
6631