xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision d3a57388)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
131 
132 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
133 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
135 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
137 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
139 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
141 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
143 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
144 
145 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
146 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
147 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
148 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
149 
150 enum ta_ras_gfx_subblock {
151 	/*CPC*/
152 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
153 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPC_UCODE,
155 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
157 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
158 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
159 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
160 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
161 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
162 	/* CPF*/
163 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
164 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
165 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
166 	TA_RAS_BLOCK__GFX_CPF_TAG,
167 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
168 	/* CPG*/
169 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
170 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
171 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
172 	TA_RAS_BLOCK__GFX_CPG_TAG,
173 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
174 	/* GDS*/
175 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
176 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
177 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
180 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
181 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
182 	/* SPI*/
183 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
184 	/* SQ*/
185 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
186 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
187 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
188 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
189 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
190 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
191 	/* SQC (3 ranges)*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	/* SQC range 0*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
196 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
204 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 	/* SQC range 1*/
206 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
208 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
218 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 	/* SQC range 2*/
220 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
222 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
224 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
225 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
229 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
230 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
232 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
234 	/* TA*/
235 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
236 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
239 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
240 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
241 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
242 	/* TCA*/
243 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
244 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
245 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
246 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
247 	/* TCC (5 sub-ranges)*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	/* TCC range 0*/
250 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
255 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
257 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
258 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
259 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
260 	/* TCC range 1*/
261 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
262 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
263 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
265 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 	/* TCC range 2*/
267 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
268 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
269 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
270 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
271 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
272 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
273 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
275 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
277 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 	/* TCC range 3*/
279 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
280 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
281 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
283 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 	/* TCC range 4*/
285 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
287 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
290 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
292 	/* TCI*/
293 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
294 	/* TCP*/
295 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
296 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
297 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
298 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
299 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
300 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
301 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
302 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
303 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
304 	/* TD*/
305 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
306 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
307 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
308 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
309 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
310 	/* EA (3 sub-ranges)*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	/* EA range 0*/
313 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
315 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
316 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
317 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
318 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
320 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
322 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
323 	/* EA range 1*/
324 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
325 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
326 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
327 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
328 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
329 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
330 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
331 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
332 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
333 	/* EA range 2*/
334 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
337 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
338 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
339 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
340 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
341 	/* UTC VM L2 bank*/
342 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
343 	/* UTC VM walker*/
344 	TA_RAS_BLOCK__UTC_VML2_WALKER,
345 	/* UTC ATC L2 2MB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
347 	/* UTC ATC L2 4KB cache*/
348 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
349 	TA_RAS_BLOCK__GFX_MAX
350 };
351 
352 struct ras_gfx_subblock {
353 	unsigned char *name;
354 	int ta_subblock;
355 	int hw_supported_error_type;
356 	int sw_supported_error_type;
357 };
358 
359 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
360 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
361 		#subblock,                                                     \
362 		TA_RAS_BLOCK__##subblock,                                      \
363 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
364 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
365 	}
366 
367 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
368 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400 			     0, 0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
404 			     0, 0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
408 			     1),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
410 			     0, 0, 0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
422 			     0, 0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
428 			     0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
440 			     0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
456 			     1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
473 			     0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
476 			     0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
478 			     0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
480 			     0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
515 };
516 
517 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
518 {
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
539 };
540 
541 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
542 {
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
561 };
562 
563 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
564 {
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
576 };
577 
578 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
579 {
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
604 };
605 
606 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
607 {
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
615 };
616 
617 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
618 {
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
654 };
655 
656 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
657 {
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
661 };
662 
663 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
664 {
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
681 };
682 
683 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
684 {
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
698 };
699 
700 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
701 {
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
713 };
714 
715 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
716 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
717 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
718 };
719 
720 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
721 {
722 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 };
731 
732 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
733 {
734 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 };
743 
744 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
745 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
746 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
747 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
748 
749 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
751 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
753 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
754 				struct amdgpu_cu_info *cu_info);
755 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
756 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
757 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
758 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
759 					  void *ras_error_status);
760 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
761 				     void *inject_if);
762 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
763 
764 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
765 				uint64_t queue_mask)
766 {
767 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
768 	amdgpu_ring_write(kiq_ring,
769 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
770 		/* vmid_mask:0* queue_type:0 (KIQ) */
771 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
772 	amdgpu_ring_write(kiq_ring,
773 			lower_32_bits(queue_mask));	/* queue mask lo */
774 	amdgpu_ring_write(kiq_ring,
775 			upper_32_bits(queue_mask));	/* queue mask hi */
776 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
777 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
778 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
779 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
780 }
781 
782 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
783 				 struct amdgpu_ring *ring)
784 {
785 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
786 	uint64_t wptr_addr = ring->wptr_gpu_addr;
787 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
788 
789 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
790 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
791 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
792 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
793 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
794 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
795 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
796 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
797 			 /*queue_type: normal compute queue */
798 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
799 			 /* alloc format: all_on_one_pipe */
800 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
801 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
802 			 /* num_queues: must be 1 */
803 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
804 	amdgpu_ring_write(kiq_ring,
805 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
806 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
807 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
808 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
809 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
810 }
811 
812 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
813 				   struct amdgpu_ring *ring,
814 				   enum amdgpu_unmap_queues_action action,
815 				   u64 gpu_addr, u64 seq)
816 {
817 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
818 
819 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
820 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
821 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
822 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
823 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
824 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
825 	amdgpu_ring_write(kiq_ring,
826 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
827 
828 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
829 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
830 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
831 		amdgpu_ring_write(kiq_ring, seq);
832 	} else {
833 		amdgpu_ring_write(kiq_ring, 0);
834 		amdgpu_ring_write(kiq_ring, 0);
835 		amdgpu_ring_write(kiq_ring, 0);
836 	}
837 }
838 
839 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
840 				   struct amdgpu_ring *ring,
841 				   u64 addr,
842 				   u64 seq)
843 {
844 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
845 
846 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
847 	amdgpu_ring_write(kiq_ring,
848 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
849 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
850 			  PACKET3_QUERY_STATUS_COMMAND(2));
851 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
852 	amdgpu_ring_write(kiq_ring,
853 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
854 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
855 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
856 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
857 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
858 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
859 }
860 
861 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
862 				uint16_t pasid, uint32_t flush_type,
863 				bool all_hub)
864 {
865 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
866 	amdgpu_ring_write(kiq_ring,
867 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
868 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
869 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
870 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
871 }
872 
873 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
874 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
875 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
876 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
877 	.kiq_query_status = gfx_v9_0_kiq_query_status,
878 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
879 	.set_resources_size = 8,
880 	.map_queues_size = 7,
881 	.unmap_queues_size = 6,
882 	.query_status_size = 7,
883 	.invalidate_tlbs_size = 2,
884 };
885 
886 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
887 {
888 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
889 }
890 
891 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
892 {
893 	switch (adev->ip_versions[GC_HWIP][0]) {
894 	case IP_VERSION(9, 0, 1):
895 		soc15_program_register_sequence(adev,
896 						golden_settings_gc_9_0,
897 						ARRAY_SIZE(golden_settings_gc_9_0));
898 		soc15_program_register_sequence(adev,
899 						golden_settings_gc_9_0_vg10,
900 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
901 		break;
902 	case IP_VERSION(9, 2, 1):
903 		soc15_program_register_sequence(adev,
904 						golden_settings_gc_9_2_1,
905 						ARRAY_SIZE(golden_settings_gc_9_2_1));
906 		soc15_program_register_sequence(adev,
907 						golden_settings_gc_9_2_1_vg12,
908 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
909 		break;
910 	case IP_VERSION(9, 4, 0):
911 		soc15_program_register_sequence(adev,
912 						golden_settings_gc_9_0,
913 						ARRAY_SIZE(golden_settings_gc_9_0));
914 		soc15_program_register_sequence(adev,
915 						golden_settings_gc_9_0_vg20,
916 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
917 		break;
918 	case IP_VERSION(9, 4, 1):
919 		soc15_program_register_sequence(adev,
920 						golden_settings_gc_9_4_1_arct,
921 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
922 		break;
923 	case IP_VERSION(9, 2, 2):
924 	case IP_VERSION(9, 1, 0):
925 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
926 						ARRAY_SIZE(golden_settings_gc_9_1));
927 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
928 			soc15_program_register_sequence(adev,
929 							golden_settings_gc_9_1_rv2,
930 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
931 		else
932 			soc15_program_register_sequence(adev,
933 							golden_settings_gc_9_1_rv1,
934 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
935 		break;
936 	 case IP_VERSION(9, 3, 0):
937 		soc15_program_register_sequence(adev,
938 						golden_settings_gc_9_1_rn,
939 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
940 		return; /* for renoir, don't need common goldensetting */
941 	case IP_VERSION(9, 4, 2):
942 		gfx_v9_4_2_init_golden_registers(adev,
943 						 adev->smuio.funcs->get_die_id(adev));
944 		break;
945 	default:
946 		break;
947 	}
948 
949 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
950 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
951 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
952 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
953 }
954 
955 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
956 				       bool wc, uint32_t reg, uint32_t val)
957 {
958 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
959 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
960 				WRITE_DATA_DST_SEL(0) |
961 				(wc ? WR_CONFIRM : 0));
962 	amdgpu_ring_write(ring, reg);
963 	amdgpu_ring_write(ring, 0);
964 	amdgpu_ring_write(ring, val);
965 }
966 
967 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
968 				  int mem_space, int opt, uint32_t addr0,
969 				  uint32_t addr1, uint32_t ref, uint32_t mask,
970 				  uint32_t inv)
971 {
972 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
973 	amdgpu_ring_write(ring,
974 				 /* memory (1) or register (0) */
975 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
976 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
977 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
978 				 WAIT_REG_MEM_ENGINE(eng_sel)));
979 
980 	if (mem_space)
981 		BUG_ON(addr0 & 0x3); /* Dword align */
982 	amdgpu_ring_write(ring, addr0);
983 	amdgpu_ring_write(ring, addr1);
984 	amdgpu_ring_write(ring, ref);
985 	amdgpu_ring_write(ring, mask);
986 	amdgpu_ring_write(ring, inv); /* poll interval */
987 }
988 
989 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
990 {
991 	struct amdgpu_device *adev = ring->adev;
992 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
993 	uint32_t tmp = 0;
994 	unsigned i;
995 	int r;
996 
997 	WREG32(scratch, 0xCAFEDEAD);
998 	r = amdgpu_ring_alloc(ring, 3);
999 	if (r)
1000 		return r;
1001 
1002 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1003 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1004 	amdgpu_ring_write(ring, 0xDEADBEEF);
1005 	amdgpu_ring_commit(ring);
1006 
1007 	for (i = 0; i < adev->usec_timeout; i++) {
1008 		tmp = RREG32(scratch);
1009 		if (tmp == 0xDEADBEEF)
1010 			break;
1011 		udelay(1);
1012 	}
1013 
1014 	if (i >= adev->usec_timeout)
1015 		r = -ETIMEDOUT;
1016 	return r;
1017 }
1018 
1019 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1020 {
1021 	struct amdgpu_device *adev = ring->adev;
1022 	struct amdgpu_ib ib;
1023 	struct dma_fence *f = NULL;
1024 
1025 	unsigned index;
1026 	uint64_t gpu_addr;
1027 	uint32_t tmp;
1028 	long r;
1029 
1030 	r = amdgpu_device_wb_get(adev, &index);
1031 	if (r)
1032 		return r;
1033 
1034 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1035 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1036 	memset(&ib, 0, sizeof(ib));
1037 	r = amdgpu_ib_get(adev, NULL, 16,
1038 					AMDGPU_IB_POOL_DIRECT, &ib);
1039 	if (r)
1040 		goto err1;
1041 
1042 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1043 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1044 	ib.ptr[2] = lower_32_bits(gpu_addr);
1045 	ib.ptr[3] = upper_32_bits(gpu_addr);
1046 	ib.ptr[4] = 0xDEADBEEF;
1047 	ib.length_dw = 5;
1048 
1049 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1050 	if (r)
1051 		goto err2;
1052 
1053 	r = dma_fence_wait_timeout(f, false, timeout);
1054 	if (r == 0) {
1055 		r = -ETIMEDOUT;
1056 		goto err2;
1057 	} else if (r < 0) {
1058 		goto err2;
1059 	}
1060 
1061 	tmp = adev->wb.wb[index];
1062 	if (tmp == 0xDEADBEEF)
1063 		r = 0;
1064 	else
1065 		r = -EINVAL;
1066 
1067 err2:
1068 	amdgpu_ib_free(adev, &ib, NULL);
1069 	dma_fence_put(f);
1070 err1:
1071 	amdgpu_device_wb_free(adev, index);
1072 	return r;
1073 }
1074 
1075 
1076 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1077 {
1078 	release_firmware(adev->gfx.pfp_fw);
1079 	adev->gfx.pfp_fw = NULL;
1080 	release_firmware(adev->gfx.me_fw);
1081 	adev->gfx.me_fw = NULL;
1082 	release_firmware(adev->gfx.ce_fw);
1083 	adev->gfx.ce_fw = NULL;
1084 	release_firmware(adev->gfx.rlc_fw);
1085 	adev->gfx.rlc_fw = NULL;
1086 	release_firmware(adev->gfx.mec_fw);
1087 	adev->gfx.mec_fw = NULL;
1088 	release_firmware(adev->gfx.mec2_fw);
1089 	adev->gfx.mec2_fw = NULL;
1090 
1091 	kfree(adev->gfx.rlc.register_list_format);
1092 }
1093 
1094 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1095 {
1096 	adev->gfx.me_fw_write_wait = false;
1097 	adev->gfx.mec_fw_write_wait = false;
1098 
1099 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1100 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1101 	    (adev->gfx.mec_feature_version < 46) ||
1102 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1103 	    (adev->gfx.pfp_feature_version < 46)))
1104 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1105 
1106 	switch (adev->ip_versions[GC_HWIP][0]) {
1107 	case IP_VERSION(9, 0, 1):
1108 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1109 		    (adev->gfx.me_feature_version >= 42) &&
1110 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1111 		    (adev->gfx.pfp_feature_version >= 42))
1112 			adev->gfx.me_fw_write_wait = true;
1113 
1114 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1115 		    (adev->gfx.mec_feature_version >= 42))
1116 			adev->gfx.mec_fw_write_wait = true;
1117 		break;
1118 	case IP_VERSION(9, 2, 1):
1119 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1120 		    (adev->gfx.me_feature_version >= 44) &&
1121 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1122 		    (adev->gfx.pfp_feature_version >= 44))
1123 			adev->gfx.me_fw_write_wait = true;
1124 
1125 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1126 		    (adev->gfx.mec_feature_version >= 44))
1127 			adev->gfx.mec_fw_write_wait = true;
1128 		break;
1129 	case IP_VERSION(9, 4, 0):
1130 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1131 		    (adev->gfx.me_feature_version >= 44) &&
1132 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1133 		    (adev->gfx.pfp_feature_version >= 44))
1134 			adev->gfx.me_fw_write_wait = true;
1135 
1136 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1137 		    (adev->gfx.mec_feature_version >= 44))
1138 			adev->gfx.mec_fw_write_wait = true;
1139 		break;
1140 	case IP_VERSION(9, 1, 0):
1141 	case IP_VERSION(9, 2, 2):
1142 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1143 		    (adev->gfx.me_feature_version >= 42) &&
1144 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1145 		    (adev->gfx.pfp_feature_version >= 42))
1146 			adev->gfx.me_fw_write_wait = true;
1147 
1148 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1149 		    (adev->gfx.mec_feature_version >= 42))
1150 			adev->gfx.mec_fw_write_wait = true;
1151 		break;
1152 	default:
1153 		adev->gfx.me_fw_write_wait = true;
1154 		adev->gfx.mec_fw_write_wait = true;
1155 		break;
1156 	}
1157 }
1158 
1159 struct amdgpu_gfxoff_quirk {
1160 	u16 chip_vendor;
1161 	u16 chip_device;
1162 	u16 subsys_vendor;
1163 	u16 subsys_device;
1164 	u8 revision;
1165 };
1166 
1167 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1168 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1169 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1170 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1171 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1172 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1173 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1174 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1175 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1176 	{ 0, 0, 0, 0, 0 },
1177 };
1178 
1179 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1180 {
1181 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1182 
1183 	while (p && p->chip_device != 0) {
1184 		if (pdev->vendor == p->chip_vendor &&
1185 		    pdev->device == p->chip_device &&
1186 		    pdev->subsystem_vendor == p->subsys_vendor &&
1187 		    pdev->subsystem_device == p->subsys_device &&
1188 		    pdev->revision == p->revision) {
1189 			return true;
1190 		}
1191 		++p;
1192 	}
1193 	return false;
1194 }
1195 
1196 static bool is_raven_kicker(struct amdgpu_device *adev)
1197 {
1198 	if (adev->pm.fw_version >= 0x41e2b)
1199 		return true;
1200 	else
1201 		return false;
1202 }
1203 
1204 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1205 {
1206 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1207 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1208 	    (adev->gfx.me_feature_version >= 52))
1209 		return true;
1210 	else
1211 		return false;
1212 }
1213 
1214 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1215 {
1216 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1217 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1218 
1219 	switch (adev->ip_versions[GC_HWIP][0]) {
1220 	case IP_VERSION(9, 0, 1):
1221 	case IP_VERSION(9, 2, 1):
1222 	case IP_VERSION(9, 4, 0):
1223 		break;
1224 	case IP_VERSION(9, 2, 2):
1225 	case IP_VERSION(9, 1, 0):
1226 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1227 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1228 		    ((!is_raven_kicker(adev) &&
1229 		      adev->gfx.rlc_fw_version < 531) ||
1230 		     (adev->gfx.rlc_feature_version < 1) ||
1231 		     !adev->gfx.rlc.is_rlc_v2_1))
1232 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1233 
1234 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1235 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1236 				AMD_PG_SUPPORT_CP |
1237 				AMD_PG_SUPPORT_RLC_SMU_HS;
1238 		break;
1239 	case IP_VERSION(9, 3, 0):
1240 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1241 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1242 				AMD_PG_SUPPORT_CP |
1243 				AMD_PG_SUPPORT_RLC_SMU_HS;
1244 		break;
1245 	default:
1246 		break;
1247 	}
1248 }
1249 
1250 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1251 					  const char *chip_name)
1252 {
1253 	char fw_name[30];
1254 	int err;
1255 
1256 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1257 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1258 	if (err)
1259 		goto out;
1260 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1261 	if (err)
1262 		goto out;
1263 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1264 
1265 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1266 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1267 	if (err)
1268 		goto out;
1269 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1270 	if (err)
1271 		goto out;
1272 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1273 
1274 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1275 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1276 	if (err)
1277 		goto out;
1278 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1279 	if (err)
1280 		goto out;
1281 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1282 
1283 out:
1284 	if (err) {
1285 		dev_err(adev->dev,
1286 			"gfx9: Failed to init firmware \"%s\"\n",
1287 			fw_name);
1288 		release_firmware(adev->gfx.pfp_fw);
1289 		adev->gfx.pfp_fw = NULL;
1290 		release_firmware(adev->gfx.me_fw);
1291 		adev->gfx.me_fw = NULL;
1292 		release_firmware(adev->gfx.ce_fw);
1293 		adev->gfx.ce_fw = NULL;
1294 	}
1295 	return err;
1296 }
1297 
1298 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1299 					  const char *chip_name)
1300 {
1301 	char fw_name[30];
1302 	int err;
1303 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1304 	uint16_t version_major;
1305 	uint16_t version_minor;
1306 	uint32_t smu_version;
1307 
1308 	/*
1309 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1310 	 * instead of picasso_rlc.bin.
1311 	 * Judgment method:
1312 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1313 	 *          or revision >= 0xD8 && revision <= 0xDF
1314 	 * otherwise is PCO FP5
1315 	 */
1316 	if (!strcmp(chip_name, "picasso") &&
1317 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1318 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1319 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1320 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1321 		(smu_version >= 0x41e2b))
1322 		/**
1323 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1324 		*/
1325 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1326 	else
1327 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1328 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1329 	if (err)
1330 		goto out;
1331 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1332 	if (err)
1333 		goto out;
1334 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1335 
1336 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1337 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1338 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1339 out:
1340 	if (err) {
1341 		dev_err(adev->dev,
1342 			"gfx9: Failed to init firmware \"%s\"\n",
1343 			fw_name);
1344 		release_firmware(adev->gfx.rlc_fw);
1345 		adev->gfx.rlc_fw = NULL;
1346 	}
1347 	return err;
1348 }
1349 
1350 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1351 {
1352 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1353 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1354 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1355 		return false;
1356 
1357 	return true;
1358 }
1359 
1360 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1361 					  const char *chip_name)
1362 {
1363 	char fw_name[30];
1364 	int err;
1365 
1366 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1367 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1368 	else
1369 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1370 
1371 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1372 	if (err)
1373 		goto out;
1374 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1375 	if (err)
1376 		goto out;
1377 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1378 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1379 
1380 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1381 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1382 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1383 		else
1384 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1385 
1386 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1387 		if (!err) {
1388 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1389 			if (err)
1390 				goto out;
1391 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1392 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1393 		} else {
1394 			err = 0;
1395 			adev->gfx.mec2_fw = NULL;
1396 		}
1397 	} else {
1398 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1399 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1400 	}
1401 
1402 out:
1403 	gfx_v9_0_check_if_need_gfxoff(adev);
1404 	gfx_v9_0_check_fw_write_wait(adev);
1405 	if (err) {
1406 		dev_err(adev->dev,
1407 			"gfx9: Failed to init firmware \"%s\"\n",
1408 			fw_name);
1409 		release_firmware(adev->gfx.mec_fw);
1410 		adev->gfx.mec_fw = NULL;
1411 		release_firmware(adev->gfx.mec2_fw);
1412 		adev->gfx.mec2_fw = NULL;
1413 	}
1414 	return err;
1415 }
1416 
1417 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1418 {
1419 	const char *chip_name;
1420 	int r;
1421 
1422 	DRM_DEBUG("\n");
1423 
1424 	switch (adev->ip_versions[GC_HWIP][0]) {
1425 	case IP_VERSION(9, 0, 1):
1426 		chip_name = "vega10";
1427 		break;
1428 	case IP_VERSION(9, 2, 1):
1429 		chip_name = "vega12";
1430 		break;
1431 	case IP_VERSION(9, 4, 0):
1432 		chip_name = "vega20";
1433 		break;
1434 	case IP_VERSION(9, 2, 2):
1435 	case IP_VERSION(9, 1, 0):
1436 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1437 			chip_name = "raven2";
1438 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1439 			chip_name = "picasso";
1440 		else
1441 			chip_name = "raven";
1442 		break;
1443 	case IP_VERSION(9, 4, 1):
1444 		chip_name = "arcturus";
1445 		break;
1446 	case IP_VERSION(9, 3, 0):
1447 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1448 			chip_name = "renoir";
1449 		else
1450 			chip_name = "green_sardine";
1451 		break;
1452 	case IP_VERSION(9, 4, 2):
1453 		chip_name = "aldebaran";
1454 		break;
1455 	default:
1456 		BUG();
1457 	}
1458 
1459 	/* No CPG in Arcturus */
1460 	if (adev->gfx.num_gfx_rings) {
1461 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1462 		if (r)
1463 			return r;
1464 	}
1465 
1466 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1467 	if (r)
1468 		return r;
1469 
1470 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1471 	if (r)
1472 		return r;
1473 
1474 	return r;
1475 }
1476 
1477 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1478 {
1479 	u32 count = 0;
1480 	const struct cs_section_def *sect = NULL;
1481 	const struct cs_extent_def *ext = NULL;
1482 
1483 	/* begin clear state */
1484 	count += 2;
1485 	/* context control state */
1486 	count += 3;
1487 
1488 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1489 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1490 			if (sect->id == SECT_CONTEXT)
1491 				count += 2 + ext->reg_count;
1492 			else
1493 				return 0;
1494 		}
1495 	}
1496 
1497 	/* end clear state */
1498 	count += 2;
1499 	/* clear state */
1500 	count += 2;
1501 
1502 	return count;
1503 }
1504 
1505 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1506 				    volatile u32 *buffer)
1507 {
1508 	u32 count = 0, i;
1509 	const struct cs_section_def *sect = NULL;
1510 	const struct cs_extent_def *ext = NULL;
1511 
1512 	if (adev->gfx.rlc.cs_data == NULL)
1513 		return;
1514 	if (buffer == NULL)
1515 		return;
1516 
1517 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1518 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1519 
1520 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1521 	buffer[count++] = cpu_to_le32(0x80000000);
1522 	buffer[count++] = cpu_to_le32(0x80000000);
1523 
1524 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1525 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1526 			if (sect->id == SECT_CONTEXT) {
1527 				buffer[count++] =
1528 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1529 				buffer[count++] = cpu_to_le32(ext->reg_index -
1530 						PACKET3_SET_CONTEXT_REG_START);
1531 				for (i = 0; i < ext->reg_count; i++)
1532 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1533 			} else {
1534 				return;
1535 			}
1536 		}
1537 	}
1538 
1539 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1540 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1541 
1542 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1543 	buffer[count++] = cpu_to_le32(0);
1544 }
1545 
1546 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1547 {
1548 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1549 	uint32_t pg_always_on_cu_num = 2;
1550 	uint32_t always_on_cu_num;
1551 	uint32_t i, j, k;
1552 	uint32_t mask, cu_bitmap, counter;
1553 
1554 	if (adev->flags & AMD_IS_APU)
1555 		always_on_cu_num = 4;
1556 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1557 		always_on_cu_num = 8;
1558 	else
1559 		always_on_cu_num = 12;
1560 
1561 	mutex_lock(&adev->grbm_idx_mutex);
1562 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1563 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1564 			mask = 1;
1565 			cu_bitmap = 0;
1566 			counter = 0;
1567 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1568 
1569 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1570 				if (cu_info->bitmap[i][j] & mask) {
1571 					if (counter == pg_always_on_cu_num)
1572 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1573 					if (counter < always_on_cu_num)
1574 						cu_bitmap |= mask;
1575 					else
1576 						break;
1577 					counter++;
1578 				}
1579 				mask <<= 1;
1580 			}
1581 
1582 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1583 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1584 		}
1585 	}
1586 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1587 	mutex_unlock(&adev->grbm_idx_mutex);
1588 }
1589 
1590 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1591 {
1592 	uint32_t data;
1593 
1594 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1595 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1596 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1597 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1598 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1599 
1600 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1601 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1602 
1603 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1604 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1605 
1606 	mutex_lock(&adev->grbm_idx_mutex);
1607 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1608 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1609 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1610 
1611 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1612 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1613 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1614 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1615 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1616 
1617 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1618 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1619 	data &= 0x0000FFFF;
1620 	data |= 0x00C00000;
1621 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1622 
1623 	/*
1624 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1625 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1626 	 */
1627 
1628 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1629 	 * but used for RLC_LB_CNTL configuration */
1630 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1631 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1632 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1633 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1634 	mutex_unlock(&adev->grbm_idx_mutex);
1635 
1636 	gfx_v9_0_init_always_on_cu_mask(adev);
1637 }
1638 
1639 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1640 {
1641 	uint32_t data;
1642 
1643 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1644 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1645 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1646 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1647 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1648 
1649 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1650 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1651 
1652 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1653 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1654 
1655 	mutex_lock(&adev->grbm_idx_mutex);
1656 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1657 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1658 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1659 
1660 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1661 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1662 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1663 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1664 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1665 
1666 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1667 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1668 	data &= 0x0000FFFF;
1669 	data |= 0x00C00000;
1670 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1671 
1672 	/*
1673 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1674 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1675 	 */
1676 
1677 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1678 	 * but used for RLC_LB_CNTL configuration */
1679 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1680 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1681 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1682 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1683 	mutex_unlock(&adev->grbm_idx_mutex);
1684 
1685 	gfx_v9_0_init_always_on_cu_mask(adev);
1686 }
1687 
1688 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1689 {
1690 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1691 }
1692 
1693 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1694 {
1695 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1696 		return 5;
1697 	else
1698 		return 4;
1699 }
1700 
1701 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1702 {
1703 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1704 
1705 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1706 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1707 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1708 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1709 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1710 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1711 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1712 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1713 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1714 }
1715 
1716 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1717 {
1718 	const struct cs_section_def *cs_data;
1719 	int r;
1720 
1721 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1722 
1723 	cs_data = adev->gfx.rlc.cs_data;
1724 
1725 	if (cs_data) {
1726 		/* init clear state block */
1727 		r = amdgpu_gfx_rlc_init_csb(adev);
1728 		if (r)
1729 			return r;
1730 	}
1731 
1732 	if (adev->flags & AMD_IS_APU) {
1733 		/* TODO: double check the cp_table_size for RV */
1734 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1735 		r = amdgpu_gfx_rlc_init_cpt(adev);
1736 		if (r)
1737 			return r;
1738 	}
1739 
1740 	switch (adev->ip_versions[GC_HWIP][0]) {
1741 	case IP_VERSION(9, 2, 2):
1742 	case IP_VERSION(9, 1, 0):
1743 		gfx_v9_0_init_lbpw(adev);
1744 		break;
1745 	case IP_VERSION(9, 4, 0):
1746 		gfx_v9_4_init_lbpw(adev);
1747 		break;
1748 	default:
1749 		break;
1750 	}
1751 
1752 	/* init spm vmid with 0xf */
1753 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1754 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1755 
1756 	return 0;
1757 }
1758 
1759 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1760 {
1761 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1762 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1763 }
1764 
1765 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1766 {
1767 	int r;
1768 	u32 *hpd;
1769 	const __le32 *fw_data;
1770 	unsigned fw_size;
1771 	u32 *fw;
1772 	size_t mec_hpd_size;
1773 
1774 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1775 
1776 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1777 
1778 	/* take ownership of the relevant compute queues */
1779 	amdgpu_gfx_compute_queue_acquire(adev);
1780 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1781 	if (mec_hpd_size) {
1782 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1783 					      AMDGPU_GEM_DOMAIN_VRAM,
1784 					      &adev->gfx.mec.hpd_eop_obj,
1785 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1786 					      (void **)&hpd);
1787 		if (r) {
1788 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1789 			gfx_v9_0_mec_fini(adev);
1790 			return r;
1791 		}
1792 
1793 		memset(hpd, 0, mec_hpd_size);
1794 
1795 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1796 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1797 	}
1798 
1799 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1800 
1801 	fw_data = (const __le32 *)
1802 		(adev->gfx.mec_fw->data +
1803 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1804 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1805 
1806 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1807 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1808 				      &adev->gfx.mec.mec_fw_obj,
1809 				      &adev->gfx.mec.mec_fw_gpu_addr,
1810 				      (void **)&fw);
1811 	if (r) {
1812 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1813 		gfx_v9_0_mec_fini(adev);
1814 		return r;
1815 	}
1816 
1817 	memcpy(fw, fw_data, fw_size);
1818 
1819 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1820 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1821 
1822 	return 0;
1823 }
1824 
1825 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1826 {
1827 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1828 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1829 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1830 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1831 		(SQ_IND_INDEX__FORCE_READ_MASK));
1832 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1833 }
1834 
1835 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1836 			   uint32_t wave, uint32_t thread,
1837 			   uint32_t regno, uint32_t num, uint32_t *out)
1838 {
1839 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1840 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1841 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1842 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1843 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1844 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1845 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1846 	while (num--)
1847 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1848 }
1849 
1850 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1851 {
1852 	/* type 1 wave data */
1853 	dst[(*no_fields)++] = 1;
1854 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1855 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1856 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1857 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1858 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1859 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1860 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1861 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1862 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1863 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1864 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1865 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1866 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1867 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1868 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1869 }
1870 
1871 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1872 				     uint32_t wave, uint32_t start,
1873 				     uint32_t size, uint32_t *dst)
1874 {
1875 	wave_read_regs(
1876 		adev, simd, wave, 0,
1877 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1878 }
1879 
1880 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1881 				     uint32_t wave, uint32_t thread,
1882 				     uint32_t start, uint32_t size,
1883 				     uint32_t *dst)
1884 {
1885 	wave_read_regs(
1886 		adev, simd, wave, thread,
1887 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1888 }
1889 
1890 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1891 				  u32 me, u32 pipe, u32 q, u32 vm)
1892 {
1893 	soc15_grbm_select(adev, me, pipe, q, vm);
1894 }
1895 
1896 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1897         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1898         .select_se_sh = &gfx_v9_0_select_se_sh,
1899         .read_wave_data = &gfx_v9_0_read_wave_data,
1900         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1901         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1902         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1903 };
1904 
1905 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1906 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1907 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1908 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1909 };
1910 
1911 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1912 	.ras_block = {
1913 		.hw_ops = &gfx_v9_0_ras_ops,
1914 	},
1915 };
1916 
1917 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1918 {
1919 	u32 gb_addr_config;
1920 	int err;
1921 
1922 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1923 
1924 	switch (adev->ip_versions[GC_HWIP][0]) {
1925 	case IP_VERSION(9, 0, 1):
1926 		adev->gfx.config.max_hw_contexts = 8;
1927 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1928 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1929 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1930 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1931 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1932 		break;
1933 	case IP_VERSION(9, 2, 1):
1934 		adev->gfx.config.max_hw_contexts = 8;
1935 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1936 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1937 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1938 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1939 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1940 		DRM_INFO("fix gfx.config for vega12\n");
1941 		break;
1942 	case IP_VERSION(9, 4, 0):
1943 		adev->gfx.ras = &gfx_v9_0_ras;
1944 		adev->gfx.config.max_hw_contexts = 8;
1945 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1946 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1947 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1948 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1949 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1950 		gb_addr_config &= ~0xf3e777ff;
1951 		gb_addr_config |= 0x22014042;
1952 		/* check vbios table if gpu info is not available */
1953 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1954 		if (err)
1955 			return err;
1956 		break;
1957 	case IP_VERSION(9, 2, 2):
1958 	case IP_VERSION(9, 1, 0):
1959 		adev->gfx.config.max_hw_contexts = 8;
1960 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1961 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1962 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1963 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1964 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1965 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1966 		else
1967 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1968 		break;
1969 	case IP_VERSION(9, 4, 1):
1970 		adev->gfx.ras = &gfx_v9_4_ras;
1971 		adev->gfx.config.max_hw_contexts = 8;
1972 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1973 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1974 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1975 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1976 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1977 		gb_addr_config &= ~0xf3e777ff;
1978 		gb_addr_config |= 0x22014042;
1979 		break;
1980 	case IP_VERSION(9, 3, 0):
1981 		adev->gfx.config.max_hw_contexts = 8;
1982 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1983 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1984 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1985 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1986 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1987 		gb_addr_config &= ~0xf3e777ff;
1988 		gb_addr_config |= 0x22010042;
1989 		break;
1990 	case IP_VERSION(9, 4, 2):
1991 		adev->gfx.ras = &gfx_v9_4_2_ras;
1992 		adev->gfx.config.max_hw_contexts = 8;
1993 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1994 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1995 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1996 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1997 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1998 		gb_addr_config &= ~0xf3e777ff;
1999 		gb_addr_config |= 0x22014042;
2000 		/* check vbios table if gpu info is not available */
2001 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2002 		if (err)
2003 			return err;
2004 		break;
2005 	default:
2006 		BUG();
2007 		break;
2008 	}
2009 
2010 	if (adev->gfx.ras) {
2011 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2012 		if (err) {
2013 			DRM_ERROR("Failed to register gfx ras block!\n");
2014 			return err;
2015 		}
2016 
2017 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2018 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2019 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2020 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2021 
2022 		/* If not define special ras_late_init function, use gfx default ras_late_init */
2023 		if (!adev->gfx.ras->ras_block.ras_late_init)
2024 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2025 
2026 		/* If not defined special ras_cb function, use default ras_cb */
2027 		if (!adev->gfx.ras->ras_block.ras_cb)
2028 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2029 	}
2030 
2031 	adev->gfx.config.gb_addr_config = gb_addr_config;
2032 
2033 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2034 			REG_GET_FIELD(
2035 					adev->gfx.config.gb_addr_config,
2036 					GB_ADDR_CONFIG,
2037 					NUM_PIPES);
2038 
2039 	adev->gfx.config.max_tile_pipes =
2040 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2041 
2042 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2043 			REG_GET_FIELD(
2044 					adev->gfx.config.gb_addr_config,
2045 					GB_ADDR_CONFIG,
2046 					NUM_BANKS);
2047 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2048 			REG_GET_FIELD(
2049 					adev->gfx.config.gb_addr_config,
2050 					GB_ADDR_CONFIG,
2051 					MAX_COMPRESSED_FRAGS);
2052 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2053 			REG_GET_FIELD(
2054 					adev->gfx.config.gb_addr_config,
2055 					GB_ADDR_CONFIG,
2056 					NUM_RB_PER_SE);
2057 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2058 			REG_GET_FIELD(
2059 					adev->gfx.config.gb_addr_config,
2060 					GB_ADDR_CONFIG,
2061 					NUM_SHADER_ENGINES);
2062 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2063 			REG_GET_FIELD(
2064 					adev->gfx.config.gb_addr_config,
2065 					GB_ADDR_CONFIG,
2066 					PIPE_INTERLEAVE_SIZE));
2067 
2068 	return 0;
2069 }
2070 
2071 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2072 				      int mec, int pipe, int queue)
2073 {
2074 	unsigned irq_type;
2075 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2076 	unsigned int hw_prio;
2077 
2078 	ring = &adev->gfx.compute_ring[ring_id];
2079 
2080 	/* mec0 is me1 */
2081 	ring->me = mec + 1;
2082 	ring->pipe = pipe;
2083 	ring->queue = queue;
2084 
2085 	ring->ring_obj = NULL;
2086 	ring->use_doorbell = true;
2087 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2088 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2089 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2090 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2091 
2092 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2093 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2094 		+ ring->pipe;
2095 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2096 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2097 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2098 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2099 				hw_prio, NULL);
2100 }
2101 
2102 static int gfx_v9_0_sw_init(void *handle)
2103 {
2104 	int i, j, k, r, ring_id;
2105 	struct amdgpu_ring *ring;
2106 	struct amdgpu_kiq *kiq;
2107 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2108 
2109 	switch (adev->ip_versions[GC_HWIP][0]) {
2110 	case IP_VERSION(9, 0, 1):
2111 	case IP_VERSION(9, 2, 1):
2112 	case IP_VERSION(9, 4, 0):
2113 	case IP_VERSION(9, 2, 2):
2114 	case IP_VERSION(9, 1, 0):
2115 	case IP_VERSION(9, 4, 1):
2116 	case IP_VERSION(9, 3, 0):
2117 	case IP_VERSION(9, 4, 2):
2118 		adev->gfx.mec.num_mec = 2;
2119 		break;
2120 	default:
2121 		adev->gfx.mec.num_mec = 1;
2122 		break;
2123 	}
2124 
2125 	adev->gfx.mec.num_pipe_per_mec = 4;
2126 	adev->gfx.mec.num_queue_per_pipe = 8;
2127 
2128 	/* EOP Event */
2129 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2130 	if (r)
2131 		return r;
2132 
2133 	/* Privileged reg */
2134 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2135 			      &adev->gfx.priv_reg_irq);
2136 	if (r)
2137 		return r;
2138 
2139 	/* Privileged inst */
2140 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2141 			      &adev->gfx.priv_inst_irq);
2142 	if (r)
2143 		return r;
2144 
2145 	/* ECC error */
2146 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2147 			      &adev->gfx.cp_ecc_error_irq);
2148 	if (r)
2149 		return r;
2150 
2151 	/* FUE error */
2152 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2153 			      &adev->gfx.cp_ecc_error_irq);
2154 	if (r)
2155 		return r;
2156 
2157 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2158 
2159 	r = gfx_v9_0_init_microcode(adev);
2160 	if (r) {
2161 		DRM_ERROR("Failed to load gfx firmware!\n");
2162 		return r;
2163 	}
2164 
2165 	if (adev->gfx.rlc.funcs) {
2166 		if (adev->gfx.rlc.funcs->init) {
2167 			r = adev->gfx.rlc.funcs->init(adev);
2168 			if (r) {
2169 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2170 				return r;
2171 			}
2172 		}
2173 	}
2174 
2175 	r = gfx_v9_0_mec_init(adev);
2176 	if (r) {
2177 		DRM_ERROR("Failed to init MEC BOs!\n");
2178 		return r;
2179 	}
2180 
2181 	/* set up the gfx ring */
2182 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2183 		ring = &adev->gfx.gfx_ring[i];
2184 		ring->ring_obj = NULL;
2185 		if (!i)
2186 			sprintf(ring->name, "gfx");
2187 		else
2188 			sprintf(ring->name, "gfx_%d", i);
2189 		ring->use_doorbell = true;
2190 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2191 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2192 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2193 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2194 		if (r)
2195 			return r;
2196 	}
2197 
2198 	/* set up the compute queues - allocate horizontally across pipes */
2199 	ring_id = 0;
2200 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2201 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2202 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2203 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2204 					continue;
2205 
2206 				r = gfx_v9_0_compute_ring_init(adev,
2207 							       ring_id,
2208 							       i, k, j);
2209 				if (r)
2210 					return r;
2211 
2212 				ring_id++;
2213 			}
2214 		}
2215 	}
2216 
2217 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2218 	if (r) {
2219 		DRM_ERROR("Failed to init KIQ BOs!\n");
2220 		return r;
2221 	}
2222 
2223 	kiq = &adev->gfx.kiq;
2224 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2225 	if (r)
2226 		return r;
2227 
2228 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2229 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2230 	if (r)
2231 		return r;
2232 
2233 	adev->gfx.ce_ram_size = 0x8000;
2234 
2235 	r = gfx_v9_0_gpu_early_init(adev);
2236 	if (r)
2237 		return r;
2238 
2239 	return 0;
2240 }
2241 
2242 
2243 static int gfx_v9_0_sw_fini(void *handle)
2244 {
2245 	int i;
2246 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2247 
2248 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2249 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2250 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2251 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2252 
2253 	amdgpu_gfx_mqd_sw_fini(adev);
2254 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2255 	amdgpu_gfx_kiq_fini(adev);
2256 
2257 	gfx_v9_0_mec_fini(adev);
2258 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2259 				&adev->gfx.rlc.clear_state_gpu_addr,
2260 				(void **)&adev->gfx.rlc.cs_ptr);
2261 	if (adev->flags & AMD_IS_APU) {
2262 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2263 				&adev->gfx.rlc.cp_table_gpu_addr,
2264 				(void **)&adev->gfx.rlc.cp_table_ptr);
2265 	}
2266 	gfx_v9_0_free_microcode(adev);
2267 
2268 	return 0;
2269 }
2270 
2271 
2272 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2273 {
2274 	/* TODO */
2275 }
2276 
2277 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2278 			   u32 instance)
2279 {
2280 	u32 data;
2281 
2282 	if (instance == 0xffffffff)
2283 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2284 	else
2285 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2286 
2287 	if (se_num == 0xffffffff)
2288 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2289 	else
2290 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2291 
2292 	if (sh_num == 0xffffffff)
2293 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2294 	else
2295 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2296 
2297 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2298 }
2299 
2300 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2301 {
2302 	u32 data, mask;
2303 
2304 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2305 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2306 
2307 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2308 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2309 
2310 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2311 					 adev->gfx.config.max_sh_per_se);
2312 
2313 	return (~data) & mask;
2314 }
2315 
2316 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2317 {
2318 	int i, j;
2319 	u32 data;
2320 	u32 active_rbs = 0;
2321 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2322 					adev->gfx.config.max_sh_per_se;
2323 
2324 	mutex_lock(&adev->grbm_idx_mutex);
2325 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2326 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2327 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2328 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2329 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2330 					       rb_bitmap_width_per_sh);
2331 		}
2332 	}
2333 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2334 	mutex_unlock(&adev->grbm_idx_mutex);
2335 
2336 	adev->gfx.config.backend_enable_mask = active_rbs;
2337 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2338 }
2339 
2340 #define DEFAULT_SH_MEM_BASES	(0x6000)
2341 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2342 {
2343 	int i;
2344 	uint32_t sh_mem_config;
2345 	uint32_t sh_mem_bases;
2346 
2347 	/*
2348 	 * Configure apertures:
2349 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2350 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2351 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2352 	 */
2353 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2354 
2355 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2356 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2357 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2358 
2359 	mutex_lock(&adev->srbm_mutex);
2360 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2361 		soc15_grbm_select(adev, 0, 0, 0, i);
2362 		/* CP and shaders */
2363 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2364 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2365 	}
2366 	soc15_grbm_select(adev, 0, 0, 0, 0);
2367 	mutex_unlock(&adev->srbm_mutex);
2368 
2369 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2370 	   access. These should be enabled by FW for target VMIDs. */
2371 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2372 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2373 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2374 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2375 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2376 	}
2377 }
2378 
2379 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2380 {
2381 	int vmid;
2382 
2383 	/*
2384 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2385 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2386 	 * the driver can enable them for graphics. VMID0 should maintain
2387 	 * access so that HWS firmware can save/restore entries.
2388 	 */
2389 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2390 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2391 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2392 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2393 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2394 	}
2395 }
2396 
2397 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2398 {
2399 	uint32_t tmp;
2400 
2401 	switch (adev->ip_versions[GC_HWIP][0]) {
2402 	case IP_VERSION(9, 4, 1):
2403 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2404 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2405 					DISABLE_BARRIER_WAITCNT, 1);
2406 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2407 		break;
2408 	default:
2409 		break;
2410 	}
2411 }
2412 
2413 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2414 {
2415 	u32 tmp;
2416 	int i;
2417 
2418 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2419 
2420 	gfx_v9_0_tiling_mode_table_init(adev);
2421 
2422 	if (adev->gfx.num_gfx_rings)
2423 		gfx_v9_0_setup_rb(adev);
2424 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2425 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2426 
2427 	/* XXX SH_MEM regs */
2428 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2429 	mutex_lock(&adev->srbm_mutex);
2430 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2431 		soc15_grbm_select(adev, 0, 0, 0, i);
2432 		/* CP and shaders */
2433 		if (i == 0) {
2434 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2435 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2436 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2437 					    !!adev->gmc.noretry);
2438 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2439 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2440 		} else {
2441 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2442 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2443 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2444 					    !!adev->gmc.noretry);
2445 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2446 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2447 				(adev->gmc.private_aperture_start >> 48));
2448 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2449 				(adev->gmc.shared_aperture_start >> 48));
2450 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2451 		}
2452 	}
2453 	soc15_grbm_select(adev, 0, 0, 0, 0);
2454 
2455 	mutex_unlock(&adev->srbm_mutex);
2456 
2457 	gfx_v9_0_init_compute_vmid(adev);
2458 	gfx_v9_0_init_gds_vmid(adev);
2459 	gfx_v9_0_init_sq_config(adev);
2460 }
2461 
2462 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2463 {
2464 	u32 i, j, k;
2465 	u32 mask;
2466 
2467 	mutex_lock(&adev->grbm_idx_mutex);
2468 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2469 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2470 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2471 			for (k = 0; k < adev->usec_timeout; k++) {
2472 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2473 					break;
2474 				udelay(1);
2475 			}
2476 			if (k == adev->usec_timeout) {
2477 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2478 						      0xffffffff, 0xffffffff);
2479 				mutex_unlock(&adev->grbm_idx_mutex);
2480 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2481 					 i, j);
2482 				return;
2483 			}
2484 		}
2485 	}
2486 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2487 	mutex_unlock(&adev->grbm_idx_mutex);
2488 
2489 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2490 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2491 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2492 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2493 	for (k = 0; k < adev->usec_timeout; k++) {
2494 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2495 			break;
2496 		udelay(1);
2497 	}
2498 }
2499 
2500 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2501 					       bool enable)
2502 {
2503 	u32 tmp;
2504 
2505 	/* These interrupts should be enabled to drive DS clock */
2506 
2507 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2508 
2509 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2510 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2511 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2512 	if(adev->gfx.num_gfx_rings)
2513 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2514 
2515 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2516 }
2517 
2518 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2519 {
2520 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2521 	/* csib */
2522 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2523 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2524 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2525 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2526 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2527 			adev->gfx.rlc.clear_state_size);
2528 }
2529 
2530 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2531 				int indirect_offset,
2532 				int list_size,
2533 				int *unique_indirect_regs,
2534 				int unique_indirect_reg_count,
2535 				int *indirect_start_offsets,
2536 				int *indirect_start_offsets_count,
2537 				int max_start_offsets_count)
2538 {
2539 	int idx;
2540 
2541 	for (; indirect_offset < list_size; indirect_offset++) {
2542 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2543 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2544 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2545 
2546 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2547 			indirect_offset += 2;
2548 
2549 			/* look for the matching indice */
2550 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2551 				if (unique_indirect_regs[idx] ==
2552 					register_list_format[indirect_offset] ||
2553 					!unique_indirect_regs[idx])
2554 					break;
2555 			}
2556 
2557 			BUG_ON(idx >= unique_indirect_reg_count);
2558 
2559 			if (!unique_indirect_regs[idx])
2560 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2561 
2562 			indirect_offset++;
2563 		}
2564 	}
2565 }
2566 
2567 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2568 {
2569 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2570 	int unique_indirect_reg_count = 0;
2571 
2572 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2573 	int indirect_start_offsets_count = 0;
2574 
2575 	int list_size = 0;
2576 	int i = 0, j = 0;
2577 	u32 tmp = 0;
2578 
2579 	u32 *register_list_format =
2580 		kmemdup(adev->gfx.rlc.register_list_format,
2581 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2582 	if (!register_list_format)
2583 		return -ENOMEM;
2584 
2585 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2586 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2587 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2588 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2589 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2590 				    unique_indirect_regs,
2591 				    unique_indirect_reg_count,
2592 				    indirect_start_offsets,
2593 				    &indirect_start_offsets_count,
2594 				    ARRAY_SIZE(indirect_start_offsets));
2595 
2596 	/* enable auto inc in case it is disabled */
2597 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2598 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2599 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2600 
2601 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2602 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2603 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2604 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2605 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2606 			adev->gfx.rlc.register_restore[i]);
2607 
2608 	/* load indirect register */
2609 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2610 		adev->gfx.rlc.reg_list_format_start);
2611 
2612 	/* direct register portion */
2613 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2614 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2615 			register_list_format[i]);
2616 
2617 	/* indirect register portion */
2618 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2619 		if (register_list_format[i] == 0xFFFFFFFF) {
2620 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2621 			continue;
2622 		}
2623 
2624 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2625 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2626 
2627 		for (j = 0; j < unique_indirect_reg_count; j++) {
2628 			if (register_list_format[i] == unique_indirect_regs[j]) {
2629 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2630 				break;
2631 			}
2632 		}
2633 
2634 		BUG_ON(j >= unique_indirect_reg_count);
2635 
2636 		i++;
2637 	}
2638 
2639 	/* set save/restore list size */
2640 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2641 	list_size = list_size >> 1;
2642 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2643 		adev->gfx.rlc.reg_restore_list_size);
2644 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2645 
2646 	/* write the starting offsets to RLC scratch ram */
2647 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2648 		adev->gfx.rlc.starting_offsets_start);
2649 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2650 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2651 		       indirect_start_offsets[i]);
2652 
2653 	/* load unique indirect regs*/
2654 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2655 		if (unique_indirect_regs[i] != 0) {
2656 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2657 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2658 			       unique_indirect_regs[i] & 0x3FFFF);
2659 
2660 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2661 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2662 			       unique_indirect_regs[i] >> 20);
2663 		}
2664 	}
2665 
2666 	kfree(register_list_format);
2667 	return 0;
2668 }
2669 
2670 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2671 {
2672 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2673 }
2674 
2675 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2676 					     bool enable)
2677 {
2678 	uint32_t data = 0;
2679 	uint32_t default_data = 0;
2680 
2681 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2682 	if (enable) {
2683 		/* enable GFXIP control over CGPG */
2684 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2685 		if(default_data != data)
2686 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2687 
2688 		/* update status */
2689 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2690 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2691 		if(default_data != data)
2692 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2693 	} else {
2694 		/* restore GFXIP control over GCPG */
2695 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2696 		if(default_data != data)
2697 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2698 	}
2699 }
2700 
2701 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2702 {
2703 	uint32_t data = 0;
2704 
2705 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2706 			      AMD_PG_SUPPORT_GFX_SMG |
2707 			      AMD_PG_SUPPORT_GFX_DMG)) {
2708 		/* init IDLE_POLL_COUNT = 60 */
2709 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2710 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2711 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2712 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2713 
2714 		/* init RLC PG Delay */
2715 		data = 0;
2716 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2717 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2718 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2719 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2720 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2721 
2722 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2723 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2724 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2725 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2726 
2727 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2728 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2729 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2730 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2731 
2732 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2733 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2734 
2735 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2736 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2737 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2738 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2739 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2740 	}
2741 }
2742 
2743 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2744 						bool enable)
2745 {
2746 	uint32_t data = 0;
2747 	uint32_t default_data = 0;
2748 
2749 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2750 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2751 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2752 			     enable ? 1 : 0);
2753 	if (default_data != data)
2754 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2755 }
2756 
2757 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2758 						bool enable)
2759 {
2760 	uint32_t data = 0;
2761 	uint32_t default_data = 0;
2762 
2763 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2764 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2765 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2766 			     enable ? 1 : 0);
2767 	if(default_data != data)
2768 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2769 }
2770 
2771 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2772 					bool enable)
2773 {
2774 	uint32_t data = 0;
2775 	uint32_t default_data = 0;
2776 
2777 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2778 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2779 			     CP_PG_DISABLE,
2780 			     enable ? 0 : 1);
2781 	if(default_data != data)
2782 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2783 }
2784 
2785 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2786 						bool enable)
2787 {
2788 	uint32_t data, default_data;
2789 
2790 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2791 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2792 			     GFX_POWER_GATING_ENABLE,
2793 			     enable ? 1 : 0);
2794 	if(default_data != data)
2795 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2796 }
2797 
2798 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2799 						bool enable)
2800 {
2801 	uint32_t data, default_data;
2802 
2803 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2804 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2805 			     GFX_PIPELINE_PG_ENABLE,
2806 			     enable ? 1 : 0);
2807 	if(default_data != data)
2808 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2809 
2810 	if (!enable)
2811 		/* read any GFX register to wake up GFX */
2812 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2813 }
2814 
2815 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2816 						       bool enable)
2817 {
2818 	uint32_t data, default_data;
2819 
2820 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2821 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2822 			     STATIC_PER_CU_PG_ENABLE,
2823 			     enable ? 1 : 0);
2824 	if(default_data != data)
2825 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2826 }
2827 
2828 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2829 						bool enable)
2830 {
2831 	uint32_t data, default_data;
2832 
2833 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2834 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2835 			     DYN_PER_CU_PG_ENABLE,
2836 			     enable ? 1 : 0);
2837 	if(default_data != data)
2838 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2839 }
2840 
2841 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2842 {
2843 	gfx_v9_0_init_csb(adev);
2844 
2845 	/*
2846 	 * Rlc save restore list is workable since v2_1.
2847 	 * And it's needed by gfxoff feature.
2848 	 */
2849 	if (adev->gfx.rlc.is_rlc_v2_1) {
2850 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2851 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2852 			gfx_v9_1_init_rlc_save_restore_list(adev);
2853 		gfx_v9_0_enable_save_restore_machine(adev);
2854 	}
2855 
2856 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2857 			      AMD_PG_SUPPORT_GFX_SMG |
2858 			      AMD_PG_SUPPORT_GFX_DMG |
2859 			      AMD_PG_SUPPORT_CP |
2860 			      AMD_PG_SUPPORT_GDS |
2861 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2862 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2863 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2864 		gfx_v9_0_init_gfx_power_gating(adev);
2865 	}
2866 }
2867 
2868 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2869 {
2870 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2871 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2872 	gfx_v9_0_wait_for_rlc_serdes(adev);
2873 }
2874 
2875 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2876 {
2877 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2878 	udelay(50);
2879 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2880 	udelay(50);
2881 }
2882 
2883 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2884 {
2885 #ifdef AMDGPU_RLC_DEBUG_RETRY
2886 	u32 rlc_ucode_ver;
2887 #endif
2888 
2889 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2890 	udelay(50);
2891 
2892 	/* carrizo do enable cp interrupt after cp inited */
2893 	if (!(adev->flags & AMD_IS_APU)) {
2894 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2895 		udelay(50);
2896 	}
2897 
2898 #ifdef AMDGPU_RLC_DEBUG_RETRY
2899 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2900 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2901 	if(rlc_ucode_ver == 0x108) {
2902 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2903 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2904 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2905 		 * default is 0x9C4 to create a 100us interval */
2906 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2907 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2908 		 * to disable the page fault retry interrupts, default is
2909 		 * 0x100 (256) */
2910 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2911 	}
2912 #endif
2913 }
2914 
2915 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2916 {
2917 	const struct rlc_firmware_header_v2_0 *hdr;
2918 	const __le32 *fw_data;
2919 	unsigned i, fw_size;
2920 
2921 	if (!adev->gfx.rlc_fw)
2922 		return -EINVAL;
2923 
2924 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2925 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2926 
2927 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2928 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2929 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2930 
2931 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2932 			RLCG_UCODE_LOADING_START_ADDRESS);
2933 	for (i = 0; i < fw_size; i++)
2934 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2935 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2936 
2937 	return 0;
2938 }
2939 
2940 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2941 {
2942 	int r;
2943 
2944 	if (amdgpu_sriov_vf(adev)) {
2945 		gfx_v9_0_init_csb(adev);
2946 		return 0;
2947 	}
2948 
2949 	adev->gfx.rlc.funcs->stop(adev);
2950 
2951 	/* disable CG */
2952 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2953 
2954 	gfx_v9_0_init_pg(adev);
2955 
2956 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2957 		/* legacy rlc firmware loading */
2958 		r = gfx_v9_0_rlc_load_microcode(adev);
2959 		if (r)
2960 			return r;
2961 	}
2962 
2963 	switch (adev->ip_versions[GC_HWIP][0]) {
2964 	case IP_VERSION(9, 2, 2):
2965 	case IP_VERSION(9, 1, 0):
2966 		if (amdgpu_lbpw == 0)
2967 			gfx_v9_0_enable_lbpw(adev, false);
2968 		else
2969 			gfx_v9_0_enable_lbpw(adev, true);
2970 		break;
2971 	case IP_VERSION(9, 4, 0):
2972 		if (amdgpu_lbpw > 0)
2973 			gfx_v9_0_enable_lbpw(adev, true);
2974 		else
2975 			gfx_v9_0_enable_lbpw(adev, false);
2976 		break;
2977 	default:
2978 		break;
2979 	}
2980 
2981 	adev->gfx.rlc.funcs->start(adev);
2982 
2983 	return 0;
2984 }
2985 
2986 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2987 {
2988 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2989 
2990 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2991 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2992 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2993 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2994 	udelay(50);
2995 }
2996 
2997 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2998 {
2999 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3000 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3001 	const struct gfx_firmware_header_v1_0 *me_hdr;
3002 	const __le32 *fw_data;
3003 	unsigned i, fw_size;
3004 
3005 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3006 		return -EINVAL;
3007 
3008 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3009 		adev->gfx.pfp_fw->data;
3010 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3011 		adev->gfx.ce_fw->data;
3012 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3013 		adev->gfx.me_fw->data;
3014 
3015 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3016 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3017 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3018 
3019 	gfx_v9_0_cp_gfx_enable(adev, false);
3020 
3021 	/* PFP */
3022 	fw_data = (const __le32 *)
3023 		(adev->gfx.pfp_fw->data +
3024 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3025 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3026 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3027 	for (i = 0; i < fw_size; i++)
3028 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3029 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3030 
3031 	/* CE */
3032 	fw_data = (const __le32 *)
3033 		(adev->gfx.ce_fw->data +
3034 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3035 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3036 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3037 	for (i = 0; i < fw_size; i++)
3038 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3039 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3040 
3041 	/* ME */
3042 	fw_data = (const __le32 *)
3043 		(adev->gfx.me_fw->data +
3044 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3045 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3046 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3047 	for (i = 0; i < fw_size; i++)
3048 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3049 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3050 
3051 	return 0;
3052 }
3053 
3054 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3055 {
3056 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3057 	const struct cs_section_def *sect = NULL;
3058 	const struct cs_extent_def *ext = NULL;
3059 	int r, i, tmp;
3060 
3061 	/* init the CP */
3062 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3063 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3064 
3065 	gfx_v9_0_cp_gfx_enable(adev, true);
3066 
3067 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3068 	if (r) {
3069 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3070 		return r;
3071 	}
3072 
3073 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3074 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3075 
3076 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3077 	amdgpu_ring_write(ring, 0x80000000);
3078 	amdgpu_ring_write(ring, 0x80000000);
3079 
3080 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3081 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3082 			if (sect->id == SECT_CONTEXT) {
3083 				amdgpu_ring_write(ring,
3084 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3085 					       ext->reg_count));
3086 				amdgpu_ring_write(ring,
3087 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3088 				for (i = 0; i < ext->reg_count; i++)
3089 					amdgpu_ring_write(ring, ext->extent[i]);
3090 			}
3091 		}
3092 	}
3093 
3094 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3095 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3096 
3097 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3098 	amdgpu_ring_write(ring, 0);
3099 
3100 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3101 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3102 	amdgpu_ring_write(ring, 0x8000);
3103 	amdgpu_ring_write(ring, 0x8000);
3104 
3105 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3106 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3107 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3108 	amdgpu_ring_write(ring, tmp);
3109 	amdgpu_ring_write(ring, 0);
3110 
3111 	amdgpu_ring_commit(ring);
3112 
3113 	return 0;
3114 }
3115 
3116 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3117 {
3118 	struct amdgpu_ring *ring;
3119 	u32 tmp;
3120 	u32 rb_bufsz;
3121 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3122 
3123 	/* Set the write pointer delay */
3124 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3125 
3126 	/* set the RB to use vmid 0 */
3127 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3128 
3129 	/* Set ring buffer size */
3130 	ring = &adev->gfx.gfx_ring[0];
3131 	rb_bufsz = order_base_2(ring->ring_size / 8);
3132 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3133 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3134 #ifdef __BIG_ENDIAN
3135 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3136 #endif
3137 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3138 
3139 	/* Initialize the ring buffer's write pointers */
3140 	ring->wptr = 0;
3141 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3142 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3143 
3144 	/* set the wb address wether it's enabled or not */
3145 	rptr_addr = ring->rptr_gpu_addr;
3146 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3147 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3148 
3149 	wptr_gpu_addr = ring->wptr_gpu_addr;
3150 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3151 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3152 
3153 	mdelay(1);
3154 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3155 
3156 	rb_addr = ring->gpu_addr >> 8;
3157 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3158 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3159 
3160 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3161 	if (ring->use_doorbell) {
3162 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3163 				    DOORBELL_OFFSET, ring->doorbell_index);
3164 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3165 				    DOORBELL_EN, 1);
3166 	} else {
3167 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3168 	}
3169 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3170 
3171 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3172 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3173 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3174 
3175 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3176 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3177 
3178 
3179 	/* start the ring */
3180 	gfx_v9_0_cp_gfx_start(adev);
3181 	ring->sched.ready = true;
3182 
3183 	return 0;
3184 }
3185 
3186 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3187 {
3188 	if (enable) {
3189 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3190 	} else {
3191 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3192 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3193 		adev->gfx.kiq.ring.sched.ready = false;
3194 	}
3195 	udelay(50);
3196 }
3197 
3198 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3199 {
3200 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3201 	const __le32 *fw_data;
3202 	unsigned i;
3203 	u32 tmp;
3204 
3205 	if (!adev->gfx.mec_fw)
3206 		return -EINVAL;
3207 
3208 	gfx_v9_0_cp_compute_enable(adev, false);
3209 
3210 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3211 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3212 
3213 	fw_data = (const __le32 *)
3214 		(adev->gfx.mec_fw->data +
3215 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3216 	tmp = 0;
3217 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3218 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3219 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3220 
3221 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3222 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3223 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3224 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3225 
3226 	/* MEC1 */
3227 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3228 			 mec_hdr->jt_offset);
3229 	for (i = 0; i < mec_hdr->jt_size; i++)
3230 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3231 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3232 
3233 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3234 			adev->gfx.mec_fw_version);
3235 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3236 
3237 	return 0;
3238 }
3239 
3240 /* KIQ functions */
3241 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3242 {
3243 	uint32_t tmp;
3244 	struct amdgpu_device *adev = ring->adev;
3245 
3246 	/* tell RLC which is KIQ queue */
3247 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3248 	tmp &= 0xffffff00;
3249 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3250 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3251 	tmp |= 0x80;
3252 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3253 }
3254 
3255 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3256 {
3257 	struct amdgpu_device *adev = ring->adev;
3258 
3259 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3260 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3261 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3262 			mqd->cp_hqd_queue_priority =
3263 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3264 		}
3265 	}
3266 }
3267 
3268 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3269 {
3270 	struct amdgpu_device *adev = ring->adev;
3271 	struct v9_mqd *mqd = ring->mqd_ptr;
3272 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3273 	uint32_t tmp;
3274 
3275 	mqd->header = 0xC0310800;
3276 	mqd->compute_pipelinestat_enable = 0x00000001;
3277 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3278 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3279 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3280 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3281 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3282 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3283 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3284 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3285 	mqd->compute_misc_reserved = 0x00000003;
3286 
3287 	mqd->dynamic_cu_mask_addr_lo =
3288 		lower_32_bits(ring->mqd_gpu_addr
3289 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3290 	mqd->dynamic_cu_mask_addr_hi =
3291 		upper_32_bits(ring->mqd_gpu_addr
3292 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3293 
3294 	eop_base_addr = ring->eop_gpu_addr >> 8;
3295 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3296 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3297 
3298 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3299 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3300 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3301 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3302 
3303 	mqd->cp_hqd_eop_control = tmp;
3304 
3305 	/* enable doorbell? */
3306 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3307 
3308 	if (ring->use_doorbell) {
3309 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3310 				    DOORBELL_OFFSET, ring->doorbell_index);
3311 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3312 				    DOORBELL_EN, 1);
3313 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3314 				    DOORBELL_SOURCE, 0);
3315 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3316 				    DOORBELL_HIT, 0);
3317 	} else {
3318 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3319 					 DOORBELL_EN, 0);
3320 	}
3321 
3322 	mqd->cp_hqd_pq_doorbell_control = tmp;
3323 
3324 	/* disable the queue if it's active */
3325 	ring->wptr = 0;
3326 	mqd->cp_hqd_dequeue_request = 0;
3327 	mqd->cp_hqd_pq_rptr = 0;
3328 	mqd->cp_hqd_pq_wptr_lo = 0;
3329 	mqd->cp_hqd_pq_wptr_hi = 0;
3330 
3331 	/* set the pointer to the MQD */
3332 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3333 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3334 
3335 	/* set MQD vmid to 0 */
3336 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3337 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3338 	mqd->cp_mqd_control = tmp;
3339 
3340 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3341 	hqd_gpu_addr = ring->gpu_addr >> 8;
3342 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3343 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3344 
3345 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3346 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3347 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3348 			    (order_base_2(ring->ring_size / 4) - 1));
3349 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3350 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3351 #ifdef __BIG_ENDIAN
3352 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3353 #endif
3354 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3355 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3356 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3357 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3358 	mqd->cp_hqd_pq_control = tmp;
3359 
3360 	/* set the wb address whether it's enabled or not */
3361 	wb_gpu_addr = ring->rptr_gpu_addr;
3362 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3363 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3364 		upper_32_bits(wb_gpu_addr) & 0xffff;
3365 
3366 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3367 	wb_gpu_addr = ring->wptr_gpu_addr;
3368 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3369 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3370 
3371 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3372 	ring->wptr = 0;
3373 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3374 
3375 	/* set the vmid for the queue */
3376 	mqd->cp_hqd_vmid = 0;
3377 
3378 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3379 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3380 	mqd->cp_hqd_persistent_state = tmp;
3381 
3382 	/* set MIN_IB_AVAIL_SIZE */
3383 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3384 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3385 	mqd->cp_hqd_ib_control = tmp;
3386 
3387 	/* set static priority for a queue/ring */
3388 	gfx_v9_0_mqd_set_priority(ring, mqd);
3389 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3390 
3391 	/* map_queues packet doesn't need activate the queue,
3392 	 * so only kiq need set this field.
3393 	 */
3394 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3395 		mqd->cp_hqd_active = 1;
3396 
3397 	return 0;
3398 }
3399 
3400 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3401 {
3402 	struct amdgpu_device *adev = ring->adev;
3403 	struct v9_mqd *mqd = ring->mqd_ptr;
3404 	int j;
3405 
3406 	/* disable wptr polling */
3407 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3408 
3409 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3410 	       mqd->cp_hqd_eop_base_addr_lo);
3411 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3412 	       mqd->cp_hqd_eop_base_addr_hi);
3413 
3414 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3415 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3416 	       mqd->cp_hqd_eop_control);
3417 
3418 	/* enable doorbell? */
3419 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3420 	       mqd->cp_hqd_pq_doorbell_control);
3421 
3422 	/* disable the queue if it's active */
3423 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3424 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3425 		for (j = 0; j < adev->usec_timeout; j++) {
3426 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3427 				break;
3428 			udelay(1);
3429 		}
3430 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3431 		       mqd->cp_hqd_dequeue_request);
3432 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3433 		       mqd->cp_hqd_pq_rptr);
3434 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3435 		       mqd->cp_hqd_pq_wptr_lo);
3436 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3437 		       mqd->cp_hqd_pq_wptr_hi);
3438 	}
3439 
3440 	/* set the pointer to the MQD */
3441 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3442 	       mqd->cp_mqd_base_addr_lo);
3443 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3444 	       mqd->cp_mqd_base_addr_hi);
3445 
3446 	/* set MQD vmid to 0 */
3447 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3448 	       mqd->cp_mqd_control);
3449 
3450 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3451 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3452 	       mqd->cp_hqd_pq_base_lo);
3453 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3454 	       mqd->cp_hqd_pq_base_hi);
3455 
3456 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3457 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3458 	       mqd->cp_hqd_pq_control);
3459 
3460 	/* set the wb address whether it's enabled or not */
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3462 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3463 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3464 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3465 
3466 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3468 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3469 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3470 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3471 
3472 	/* enable the doorbell if requested */
3473 	if (ring->use_doorbell) {
3474 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3475 					(adev->doorbell_index.kiq * 2) << 2);
3476 		/* If GC has entered CGPG, ringing doorbell > first page
3477 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3478 		 * workaround this issue. And this change has to align with firmware
3479 		 * update.
3480 		 */
3481 		if (check_if_enlarge_doorbell_range(adev))
3482 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3483 					(adev->doorbell.size - 4));
3484 		else
3485 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3486 					(adev->doorbell_index.userqueue_end * 2) << 2);
3487 	}
3488 
3489 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3490 	       mqd->cp_hqd_pq_doorbell_control);
3491 
3492 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3493 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3494 	       mqd->cp_hqd_pq_wptr_lo);
3495 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3496 	       mqd->cp_hqd_pq_wptr_hi);
3497 
3498 	/* set the vmid for the queue */
3499 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3500 
3501 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3502 	       mqd->cp_hqd_persistent_state);
3503 
3504 	/* activate the queue */
3505 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3506 	       mqd->cp_hqd_active);
3507 
3508 	if (ring->use_doorbell)
3509 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3510 
3511 	return 0;
3512 }
3513 
3514 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3515 {
3516 	struct amdgpu_device *adev = ring->adev;
3517 	int j;
3518 
3519 	/* disable the queue if it's active */
3520 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3521 
3522 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3523 
3524 		for (j = 0; j < adev->usec_timeout; j++) {
3525 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3526 				break;
3527 			udelay(1);
3528 		}
3529 
3530 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3531 			DRM_DEBUG("KIQ dequeue request failed.\n");
3532 
3533 			/* Manual disable if dequeue request times out */
3534 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3535 		}
3536 
3537 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3538 		      0);
3539 	}
3540 
3541 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3542 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3543 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3544 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3545 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3546 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3547 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3548 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3549 
3550 	return 0;
3551 }
3552 
3553 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3554 {
3555 	struct amdgpu_device *adev = ring->adev;
3556 	struct v9_mqd *mqd = ring->mqd_ptr;
3557 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3558 	struct v9_mqd *tmp_mqd;
3559 
3560 	gfx_v9_0_kiq_setting(ring);
3561 
3562 	/* GPU could be in bad state during probe, driver trigger the reset
3563 	 * after load the SMU, in this case , the mqd is not be initialized.
3564 	 * driver need to re-init the mqd.
3565 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3566 	 */
3567 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3568 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3569 		/* for GPU_RESET case , reset MQD to a clean status */
3570 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3571 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3572 
3573 		/* reset ring buffer */
3574 		ring->wptr = 0;
3575 		amdgpu_ring_clear_ring(ring);
3576 
3577 		mutex_lock(&adev->srbm_mutex);
3578 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3579 		gfx_v9_0_kiq_init_register(ring);
3580 		soc15_grbm_select(adev, 0, 0, 0, 0);
3581 		mutex_unlock(&adev->srbm_mutex);
3582 	} else {
3583 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3584 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3585 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3586 		mutex_lock(&adev->srbm_mutex);
3587 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3588 		gfx_v9_0_mqd_init(ring);
3589 		gfx_v9_0_kiq_init_register(ring);
3590 		soc15_grbm_select(adev, 0, 0, 0, 0);
3591 		mutex_unlock(&adev->srbm_mutex);
3592 
3593 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3594 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3595 	}
3596 
3597 	return 0;
3598 }
3599 
3600 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3601 {
3602 	struct amdgpu_device *adev = ring->adev;
3603 	struct v9_mqd *mqd = ring->mqd_ptr;
3604 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3605 	struct v9_mqd *tmp_mqd;
3606 
3607 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3608 	 * is not be initialized before
3609 	 */
3610 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3611 
3612 	if (!tmp_mqd->cp_hqd_pq_control ||
3613 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3614 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3615 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3616 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3617 		mutex_lock(&adev->srbm_mutex);
3618 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3619 		gfx_v9_0_mqd_init(ring);
3620 		soc15_grbm_select(adev, 0, 0, 0, 0);
3621 		mutex_unlock(&adev->srbm_mutex);
3622 
3623 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3624 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3625 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3626 		/* reset MQD to a clean status */
3627 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3628 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3629 
3630 		/* reset ring buffer */
3631 		ring->wptr = 0;
3632 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3633 		amdgpu_ring_clear_ring(ring);
3634 	} else {
3635 		amdgpu_ring_clear_ring(ring);
3636 	}
3637 
3638 	return 0;
3639 }
3640 
3641 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3642 {
3643 	struct amdgpu_ring *ring;
3644 	int r;
3645 
3646 	ring = &adev->gfx.kiq.ring;
3647 
3648 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3649 	if (unlikely(r != 0))
3650 		return r;
3651 
3652 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3653 	if (unlikely(r != 0))
3654 		return r;
3655 
3656 	gfx_v9_0_kiq_init_queue(ring);
3657 	amdgpu_bo_kunmap(ring->mqd_obj);
3658 	ring->mqd_ptr = NULL;
3659 	amdgpu_bo_unreserve(ring->mqd_obj);
3660 	ring->sched.ready = true;
3661 	return 0;
3662 }
3663 
3664 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3665 {
3666 	struct amdgpu_ring *ring = NULL;
3667 	int r = 0, i;
3668 
3669 	gfx_v9_0_cp_compute_enable(adev, true);
3670 
3671 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3672 		ring = &adev->gfx.compute_ring[i];
3673 
3674 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3675 		if (unlikely(r != 0))
3676 			goto done;
3677 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3678 		if (!r) {
3679 			r = gfx_v9_0_kcq_init_queue(ring);
3680 			amdgpu_bo_kunmap(ring->mqd_obj);
3681 			ring->mqd_ptr = NULL;
3682 		}
3683 		amdgpu_bo_unreserve(ring->mqd_obj);
3684 		if (r)
3685 			goto done;
3686 	}
3687 
3688 	r = amdgpu_gfx_enable_kcq(adev);
3689 done:
3690 	return r;
3691 }
3692 
3693 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3694 {
3695 	int r, i;
3696 	struct amdgpu_ring *ring;
3697 
3698 	if (!(adev->flags & AMD_IS_APU))
3699 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3700 
3701 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3702 		if (adev->gfx.num_gfx_rings) {
3703 			/* legacy firmware loading */
3704 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3705 			if (r)
3706 				return r;
3707 		}
3708 
3709 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3710 		if (r)
3711 			return r;
3712 	}
3713 
3714 	r = gfx_v9_0_kiq_resume(adev);
3715 	if (r)
3716 		return r;
3717 
3718 	if (adev->gfx.num_gfx_rings) {
3719 		r = gfx_v9_0_cp_gfx_resume(adev);
3720 		if (r)
3721 			return r;
3722 	}
3723 
3724 	r = gfx_v9_0_kcq_resume(adev);
3725 	if (r)
3726 		return r;
3727 
3728 	if (adev->gfx.num_gfx_rings) {
3729 		ring = &adev->gfx.gfx_ring[0];
3730 		r = amdgpu_ring_test_helper(ring);
3731 		if (r)
3732 			return r;
3733 	}
3734 
3735 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3736 		ring = &adev->gfx.compute_ring[i];
3737 		amdgpu_ring_test_helper(ring);
3738 	}
3739 
3740 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3741 
3742 	return 0;
3743 }
3744 
3745 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3746 {
3747 	u32 tmp;
3748 
3749 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3750 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3751 		return;
3752 
3753 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3754 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3755 				adev->df.hash_status.hash_64k);
3756 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3757 				adev->df.hash_status.hash_2m);
3758 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3759 				adev->df.hash_status.hash_1g);
3760 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3761 }
3762 
3763 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3764 {
3765 	if (adev->gfx.num_gfx_rings)
3766 		gfx_v9_0_cp_gfx_enable(adev, enable);
3767 	gfx_v9_0_cp_compute_enable(adev, enable);
3768 }
3769 
3770 static int gfx_v9_0_hw_init(void *handle)
3771 {
3772 	int r;
3773 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3774 
3775 	if (!amdgpu_sriov_vf(adev))
3776 		gfx_v9_0_init_golden_registers(adev);
3777 
3778 	gfx_v9_0_constants_init(adev);
3779 
3780 	gfx_v9_0_init_tcp_config(adev);
3781 
3782 	r = adev->gfx.rlc.funcs->resume(adev);
3783 	if (r)
3784 		return r;
3785 
3786 	r = gfx_v9_0_cp_resume(adev);
3787 	if (r)
3788 		return r;
3789 
3790 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3791 		gfx_v9_4_2_set_power_brake_sequence(adev);
3792 
3793 	return r;
3794 }
3795 
3796 static int gfx_v9_0_hw_fini(void *handle)
3797 {
3798 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3799 
3800 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3801 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3802 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3803 
3804 	/* DF freeze and kcq disable will fail */
3805 	if (!amdgpu_ras_intr_triggered())
3806 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3807 		amdgpu_gfx_disable_kcq(adev);
3808 
3809 	if (amdgpu_sriov_vf(adev)) {
3810 		gfx_v9_0_cp_gfx_enable(adev, false);
3811 		/* must disable polling for SRIOV when hw finished, otherwise
3812 		 * CPC engine may still keep fetching WB address which is already
3813 		 * invalid after sw finished and trigger DMAR reading error in
3814 		 * hypervisor side.
3815 		 */
3816 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3817 		return 0;
3818 	}
3819 
3820 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3821 	 * otherwise KIQ is hanging when binding back
3822 	 */
3823 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3824 		mutex_lock(&adev->srbm_mutex);
3825 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3826 				adev->gfx.kiq.ring.pipe,
3827 				adev->gfx.kiq.ring.queue, 0);
3828 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3829 		soc15_grbm_select(adev, 0, 0, 0, 0);
3830 		mutex_unlock(&adev->srbm_mutex);
3831 	}
3832 
3833 	gfx_v9_0_cp_enable(adev, false);
3834 
3835 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3836 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3837 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3838 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3839 		return 0;
3840 	}
3841 
3842 	adev->gfx.rlc.funcs->stop(adev);
3843 	return 0;
3844 }
3845 
3846 static int gfx_v9_0_suspend(void *handle)
3847 {
3848 	return gfx_v9_0_hw_fini(handle);
3849 }
3850 
3851 static int gfx_v9_0_resume(void *handle)
3852 {
3853 	return gfx_v9_0_hw_init(handle);
3854 }
3855 
3856 static bool gfx_v9_0_is_idle(void *handle)
3857 {
3858 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3859 
3860 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3861 				GRBM_STATUS, GUI_ACTIVE))
3862 		return false;
3863 	else
3864 		return true;
3865 }
3866 
3867 static int gfx_v9_0_wait_for_idle(void *handle)
3868 {
3869 	unsigned i;
3870 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3871 
3872 	for (i = 0; i < adev->usec_timeout; i++) {
3873 		if (gfx_v9_0_is_idle(handle))
3874 			return 0;
3875 		udelay(1);
3876 	}
3877 	return -ETIMEDOUT;
3878 }
3879 
3880 static int gfx_v9_0_soft_reset(void *handle)
3881 {
3882 	u32 grbm_soft_reset = 0;
3883 	u32 tmp;
3884 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3885 
3886 	/* GRBM_STATUS */
3887 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3888 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3889 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3890 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3891 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3892 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3893 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3894 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3895 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3896 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3897 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3898 	}
3899 
3900 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3901 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3902 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3903 	}
3904 
3905 	/* GRBM_STATUS2 */
3906 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3907 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3908 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3909 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3910 
3911 
3912 	if (grbm_soft_reset) {
3913 		/* stop the rlc */
3914 		adev->gfx.rlc.funcs->stop(adev);
3915 
3916 		if (adev->gfx.num_gfx_rings)
3917 			/* Disable GFX parsing/prefetching */
3918 			gfx_v9_0_cp_gfx_enable(adev, false);
3919 
3920 		/* Disable MEC parsing/prefetching */
3921 		gfx_v9_0_cp_compute_enable(adev, false);
3922 
3923 		if (grbm_soft_reset) {
3924 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3925 			tmp |= grbm_soft_reset;
3926 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3927 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3928 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3929 
3930 			udelay(50);
3931 
3932 			tmp &= ~grbm_soft_reset;
3933 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3934 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3935 		}
3936 
3937 		/* Wait a little for things to settle down */
3938 		udelay(50);
3939 	}
3940 	return 0;
3941 }
3942 
3943 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3944 {
3945 	signed long r, cnt = 0;
3946 	unsigned long flags;
3947 	uint32_t seq, reg_val_offs = 0;
3948 	uint64_t value = 0;
3949 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3950 	struct amdgpu_ring *ring = &kiq->ring;
3951 
3952 	BUG_ON(!ring->funcs->emit_rreg);
3953 
3954 	spin_lock_irqsave(&kiq->ring_lock, flags);
3955 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3956 		pr_err("critical bug! too many kiq readers\n");
3957 		goto failed_unlock;
3958 	}
3959 	amdgpu_ring_alloc(ring, 32);
3960 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3961 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3962 				(5 << 8) |	/* dst: memory */
3963 				(1 << 16) |	/* count sel */
3964 				(1 << 20));	/* write confirm */
3965 	amdgpu_ring_write(ring, 0);
3966 	amdgpu_ring_write(ring, 0);
3967 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3968 				reg_val_offs * 4));
3969 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3970 				reg_val_offs * 4));
3971 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3972 	if (r)
3973 		goto failed_undo;
3974 
3975 	amdgpu_ring_commit(ring);
3976 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3977 
3978 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3979 
3980 	/* don't wait anymore for gpu reset case because this way may
3981 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3982 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3983 	 * never return if we keep waiting in virt_kiq_rreg, which cause
3984 	 * gpu_recover() hang there.
3985 	 *
3986 	 * also don't wait anymore for IRQ context
3987 	 * */
3988 	if (r < 1 && (amdgpu_in_reset(adev)))
3989 		goto failed_kiq_read;
3990 
3991 	might_sleep();
3992 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3993 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3994 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3995 	}
3996 
3997 	if (cnt > MAX_KIQ_REG_TRY)
3998 		goto failed_kiq_read;
3999 
4000 	mb();
4001 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4002 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4003 	amdgpu_device_wb_free(adev, reg_val_offs);
4004 	return value;
4005 
4006 failed_undo:
4007 	amdgpu_ring_undo(ring);
4008 failed_unlock:
4009 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4010 failed_kiq_read:
4011 	if (reg_val_offs)
4012 		amdgpu_device_wb_free(adev, reg_val_offs);
4013 	pr_err("failed to read gpu clock\n");
4014 	return ~0;
4015 }
4016 
4017 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4018 {
4019 	uint64_t clock, clock_lo, clock_hi, hi_check;
4020 
4021 	switch (adev->ip_versions[GC_HWIP][0]) {
4022 	case IP_VERSION(9, 3, 0):
4023 		preempt_disable();
4024 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4025 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4026 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4027 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4028 		 * roughly every 42 seconds.
4029 		 */
4030 		if (hi_check != clock_hi) {
4031 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4032 			clock_hi = hi_check;
4033 		}
4034 		preempt_enable();
4035 		clock = clock_lo | (clock_hi << 32ULL);
4036 		break;
4037 	default:
4038 		amdgpu_gfx_off_ctrl(adev, false);
4039 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4040 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4041 			clock = gfx_v9_0_kiq_read_clock(adev);
4042 		} else {
4043 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4044 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4045 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4046 		}
4047 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4048 		amdgpu_gfx_off_ctrl(adev, true);
4049 		break;
4050 	}
4051 	return clock;
4052 }
4053 
4054 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4055 					  uint32_t vmid,
4056 					  uint32_t gds_base, uint32_t gds_size,
4057 					  uint32_t gws_base, uint32_t gws_size,
4058 					  uint32_t oa_base, uint32_t oa_size)
4059 {
4060 	struct amdgpu_device *adev = ring->adev;
4061 
4062 	/* GDS Base */
4063 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4064 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4065 				   gds_base);
4066 
4067 	/* GDS Size */
4068 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4069 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4070 				   gds_size);
4071 
4072 	/* GWS */
4073 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4074 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4075 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4076 
4077 	/* OA */
4078 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4079 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4080 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4081 }
4082 
4083 static const u32 vgpr_init_compute_shader[] =
4084 {
4085 	0xb07c0000, 0xbe8000ff,
4086 	0x000000f8, 0xbf110800,
4087 	0x7e000280, 0x7e020280,
4088 	0x7e040280, 0x7e060280,
4089 	0x7e080280, 0x7e0a0280,
4090 	0x7e0c0280, 0x7e0e0280,
4091 	0x80808800, 0xbe803200,
4092 	0xbf84fff5, 0xbf9c0000,
4093 	0xd28c0001, 0x0001007f,
4094 	0xd28d0001, 0x0002027e,
4095 	0x10020288, 0xb8810904,
4096 	0xb7814000, 0xd1196a01,
4097 	0x00000301, 0xbe800087,
4098 	0xbefc00c1, 0xd89c4000,
4099 	0x00020201, 0xd89cc080,
4100 	0x00040401, 0x320202ff,
4101 	0x00000800, 0x80808100,
4102 	0xbf84fff8, 0x7e020280,
4103 	0xbf810000, 0x00000000,
4104 };
4105 
4106 static const u32 sgpr_init_compute_shader[] =
4107 {
4108 	0xb07c0000, 0xbe8000ff,
4109 	0x0000005f, 0xbee50080,
4110 	0xbe812c65, 0xbe822c65,
4111 	0xbe832c65, 0xbe842c65,
4112 	0xbe852c65, 0xb77c0005,
4113 	0x80808500, 0xbf84fff8,
4114 	0xbe800080, 0xbf810000,
4115 };
4116 
4117 static const u32 vgpr_init_compute_shader_arcturus[] = {
4118 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4119 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4120 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4121 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4122 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4123 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4124 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4125 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4126 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4127 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4128 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4129 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4130 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4131 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4132 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4133 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4134 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4135 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4136 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4137 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4138 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4139 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4140 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4141 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4142 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4143 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4144 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4145 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4146 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4147 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4148 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4149 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4150 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4151 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4152 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4153 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4154 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4155 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4156 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4157 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4158 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4159 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4160 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4161 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4162 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4163 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4164 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4165 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4166 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4167 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4168 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4169 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4170 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4171 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4172 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4173 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4174 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4175 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4176 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4177 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4178 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4179 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4180 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4181 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4182 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4183 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4184 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4185 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4186 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4187 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4188 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4189 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4190 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4191 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4192 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4193 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4194 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4195 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4196 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4197 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4198 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4199 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4200 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4201 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4202 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4203 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4204 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4205 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4206 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4207 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4208 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4209 	0xbf84fff8, 0xbf810000,
4210 };
4211 
4212 /* When below register arrays changed, please update gpr_reg_size,
4213   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4214   to cover all gfx9 ASICs */
4215 static const struct soc15_reg_entry vgpr_init_regs[] = {
4216    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4217    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4218    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4219    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4220    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4221    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4222    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4223    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4224    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4225    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4226    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4227    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4228    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4229    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4230 };
4231 
4232 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4233    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4234    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4235    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4236    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4237    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4238    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4239    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4240    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4241    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4242    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4243    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4244    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4245    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4246    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4247 };
4248 
4249 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4250    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4251    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4252    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4253    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4254    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4255    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4256    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4257    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4258    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4259    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4260    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4261    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4262    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4263    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4264 };
4265 
4266 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4267    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4268    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4269    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4270    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4271    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4272    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4273    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4274    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4275    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4276    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4277    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4278    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4279    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4280    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4281 };
4282 
4283 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4284    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4285    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4286    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4287    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4288    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4289    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4290    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4291    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4292    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4293    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4294    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4295    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4296    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4297    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4298    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4299    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4300    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4301    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4302    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4303    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4304    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4305    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4306    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4307    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4308    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4309    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4310    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4311    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4312    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4313    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4314    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4315    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4316    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4317 };
4318 
4319 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4320 {
4321 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4322 	int i, r;
4323 
4324 	/* only support when RAS is enabled */
4325 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4326 		return 0;
4327 
4328 	r = amdgpu_ring_alloc(ring, 7);
4329 	if (r) {
4330 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4331 			ring->name, r);
4332 		return r;
4333 	}
4334 
4335 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4336 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4337 
4338 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4339 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4340 				PACKET3_DMA_DATA_DST_SEL(1) |
4341 				PACKET3_DMA_DATA_SRC_SEL(2) |
4342 				PACKET3_DMA_DATA_ENGINE(0)));
4343 	amdgpu_ring_write(ring, 0);
4344 	amdgpu_ring_write(ring, 0);
4345 	amdgpu_ring_write(ring, 0);
4346 	amdgpu_ring_write(ring, 0);
4347 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4348 				adev->gds.gds_size);
4349 
4350 	amdgpu_ring_commit(ring);
4351 
4352 	for (i = 0; i < adev->usec_timeout; i++) {
4353 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4354 			break;
4355 		udelay(1);
4356 	}
4357 
4358 	if (i >= adev->usec_timeout)
4359 		r = -ETIMEDOUT;
4360 
4361 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4362 
4363 	return r;
4364 }
4365 
4366 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4367 {
4368 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4369 	struct amdgpu_ib ib;
4370 	struct dma_fence *f = NULL;
4371 	int r, i;
4372 	unsigned total_size, vgpr_offset, sgpr_offset;
4373 	u64 gpu_addr;
4374 
4375 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4376 						adev->gfx.config.max_cu_per_sh *
4377 						adev->gfx.config.max_sh_per_se;
4378 	int sgpr_work_group_size = 5;
4379 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4380 	int vgpr_init_shader_size;
4381 	const u32 *vgpr_init_shader_ptr;
4382 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4383 
4384 	/* only support when RAS is enabled */
4385 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4386 		return 0;
4387 
4388 	/* bail if the compute ring is not ready */
4389 	if (!ring->sched.ready)
4390 		return 0;
4391 
4392 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4393 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4394 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4395 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4396 	} else {
4397 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4398 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4399 		vgpr_init_regs_ptr = vgpr_init_regs;
4400 	}
4401 
4402 	total_size =
4403 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4404 	total_size +=
4405 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4406 	total_size +=
4407 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4408 	total_size = ALIGN(total_size, 256);
4409 	vgpr_offset = total_size;
4410 	total_size += ALIGN(vgpr_init_shader_size, 256);
4411 	sgpr_offset = total_size;
4412 	total_size += sizeof(sgpr_init_compute_shader);
4413 
4414 	/* allocate an indirect buffer to put the commands in */
4415 	memset(&ib, 0, sizeof(ib));
4416 	r = amdgpu_ib_get(adev, NULL, total_size,
4417 					AMDGPU_IB_POOL_DIRECT, &ib);
4418 	if (r) {
4419 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4420 		return r;
4421 	}
4422 
4423 	/* load the compute shaders */
4424 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4425 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4426 
4427 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4428 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4429 
4430 	/* init the ib length to 0 */
4431 	ib.length_dw = 0;
4432 
4433 	/* VGPR */
4434 	/* write the register state for the compute dispatch */
4435 	for (i = 0; i < gpr_reg_size; i++) {
4436 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4437 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4438 								- PACKET3_SET_SH_REG_START;
4439 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4440 	}
4441 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4442 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4443 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4444 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4445 							- PACKET3_SET_SH_REG_START;
4446 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4447 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4448 
4449 	/* write dispatch packet */
4450 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4451 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4452 	ib.ptr[ib.length_dw++] = 1; /* y */
4453 	ib.ptr[ib.length_dw++] = 1; /* z */
4454 	ib.ptr[ib.length_dw++] =
4455 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4456 
4457 	/* write CS partial flush packet */
4458 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4459 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4460 
4461 	/* SGPR1 */
4462 	/* write the register state for the compute dispatch */
4463 	for (i = 0; i < gpr_reg_size; i++) {
4464 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4465 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4466 								- PACKET3_SET_SH_REG_START;
4467 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4468 	}
4469 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4470 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4471 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4472 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4473 							- PACKET3_SET_SH_REG_START;
4474 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4475 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4476 
4477 	/* write dispatch packet */
4478 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4479 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4480 	ib.ptr[ib.length_dw++] = 1; /* y */
4481 	ib.ptr[ib.length_dw++] = 1; /* z */
4482 	ib.ptr[ib.length_dw++] =
4483 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4484 
4485 	/* write CS partial flush packet */
4486 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4487 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4488 
4489 	/* SGPR2 */
4490 	/* write the register state for the compute dispatch */
4491 	for (i = 0; i < gpr_reg_size; i++) {
4492 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4493 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4494 								- PACKET3_SET_SH_REG_START;
4495 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4496 	}
4497 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4498 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4499 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4500 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4501 							- PACKET3_SET_SH_REG_START;
4502 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4503 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4504 
4505 	/* write dispatch packet */
4506 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4507 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4508 	ib.ptr[ib.length_dw++] = 1; /* y */
4509 	ib.ptr[ib.length_dw++] = 1; /* z */
4510 	ib.ptr[ib.length_dw++] =
4511 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4512 
4513 	/* write CS partial flush packet */
4514 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4515 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4516 
4517 	/* shedule the ib on the ring */
4518 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4519 	if (r) {
4520 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4521 		goto fail;
4522 	}
4523 
4524 	/* wait for the GPU to finish processing the IB */
4525 	r = dma_fence_wait(f, false);
4526 	if (r) {
4527 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4528 		goto fail;
4529 	}
4530 
4531 fail:
4532 	amdgpu_ib_free(adev, &ib, NULL);
4533 	dma_fence_put(f);
4534 
4535 	return r;
4536 }
4537 
4538 static int gfx_v9_0_early_init(void *handle)
4539 {
4540 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4541 
4542 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4543 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4544 		adev->gfx.num_gfx_rings = 0;
4545 	else
4546 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4547 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4548 					  AMDGPU_MAX_COMPUTE_RINGS);
4549 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4550 	gfx_v9_0_set_ring_funcs(adev);
4551 	gfx_v9_0_set_irq_funcs(adev);
4552 	gfx_v9_0_set_gds_init(adev);
4553 	gfx_v9_0_set_rlc_funcs(adev);
4554 
4555 	/* init rlcg reg access ctrl */
4556 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4557 
4558 	return 0;
4559 }
4560 
4561 static int gfx_v9_0_ecc_late_init(void *handle)
4562 {
4563 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4564 	int r;
4565 
4566 	/*
4567 	 * Temp workaround to fix the issue that CP firmware fails to
4568 	 * update read pointer when CPDMA is writing clearing operation
4569 	 * to GDS in suspend/resume sequence on several cards. So just
4570 	 * limit this operation in cold boot sequence.
4571 	 */
4572 	if ((!adev->in_suspend) &&
4573 	    (adev->gds.gds_size)) {
4574 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4575 		if (r)
4576 			return r;
4577 	}
4578 
4579 	/* requires IBs so do in late init after IB pool is initialized */
4580 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4581 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4582 	else
4583 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4584 
4585 	if (r)
4586 		return r;
4587 
4588 	if (adev->gfx.ras &&
4589 	    adev->gfx.ras->enable_watchdog_timer)
4590 		adev->gfx.ras->enable_watchdog_timer(adev);
4591 
4592 	return 0;
4593 }
4594 
4595 static int gfx_v9_0_late_init(void *handle)
4596 {
4597 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4598 	int r;
4599 
4600 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4601 	if (r)
4602 		return r;
4603 
4604 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4605 	if (r)
4606 		return r;
4607 
4608 	r = gfx_v9_0_ecc_late_init(handle);
4609 	if (r)
4610 		return r;
4611 
4612 	return 0;
4613 }
4614 
4615 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4616 {
4617 	uint32_t rlc_setting;
4618 
4619 	/* if RLC is not enabled, do nothing */
4620 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4621 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4622 		return false;
4623 
4624 	return true;
4625 }
4626 
4627 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4628 {
4629 	uint32_t data;
4630 	unsigned i;
4631 
4632 	data = RLC_SAFE_MODE__CMD_MASK;
4633 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4634 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4635 
4636 	/* wait for RLC_SAFE_MODE */
4637 	for (i = 0; i < adev->usec_timeout; i++) {
4638 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4639 			break;
4640 		udelay(1);
4641 	}
4642 }
4643 
4644 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4645 {
4646 	uint32_t data;
4647 
4648 	data = RLC_SAFE_MODE__CMD_MASK;
4649 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4650 }
4651 
4652 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4653 						bool enable)
4654 {
4655 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4656 
4657 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4658 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4659 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4660 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4661 	} else {
4662 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4663 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4664 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4665 	}
4666 
4667 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4668 }
4669 
4670 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4671 						bool enable)
4672 {
4673 	/* TODO: double check if we need to perform under safe mode */
4674 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4675 
4676 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4677 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4678 	else
4679 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4680 
4681 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4682 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4683 	else
4684 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4685 
4686 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4687 }
4688 
4689 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4690 						      bool enable)
4691 {
4692 	uint32_t data, def;
4693 
4694 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4695 
4696 	/* It is disabled by HW by default */
4697 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4698 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4699 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4700 
4701 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4702 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4703 
4704 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4705 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4706 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4707 
4708 		/* only for Vega10 & Raven1 */
4709 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4710 
4711 		if (def != data)
4712 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4713 
4714 		/* MGLS is a global flag to control all MGLS in GFX */
4715 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4716 			/* 2 - RLC memory Light sleep */
4717 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4718 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4719 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4720 				if (def != data)
4721 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4722 			}
4723 			/* 3 - CP memory Light sleep */
4724 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4725 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4726 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4727 				if (def != data)
4728 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4729 			}
4730 		}
4731 	} else {
4732 		/* 1 - MGCG_OVERRIDE */
4733 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4734 
4735 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4736 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4737 
4738 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4739 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4740 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4741 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4742 
4743 		if (def != data)
4744 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4745 
4746 		/* 2 - disable MGLS in RLC */
4747 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4748 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4749 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4750 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4751 		}
4752 
4753 		/* 3 - disable MGLS in CP */
4754 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4755 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4756 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4757 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4758 		}
4759 	}
4760 
4761 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4762 }
4763 
4764 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4765 					   bool enable)
4766 {
4767 	uint32_t data, def;
4768 
4769 	if (!adev->gfx.num_gfx_rings)
4770 		return;
4771 
4772 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4773 
4774 	/* Enable 3D CGCG/CGLS */
4775 	if (enable) {
4776 		/* write cmd to clear cgcg/cgls ov */
4777 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4778 		/* unset CGCG override */
4779 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4780 		/* update CGCG and CGLS override bits */
4781 		if (def != data)
4782 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4783 
4784 		/* enable 3Dcgcg FSM(0x0000363f) */
4785 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4786 
4787 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4788 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4789 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4790 		else
4791 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4792 
4793 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4794 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4795 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4796 		if (def != data)
4797 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4798 
4799 		/* set IDLE_POLL_COUNT(0x00900100) */
4800 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4801 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4802 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4803 		if (def != data)
4804 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4805 	} else {
4806 		/* Disable CGCG/CGLS */
4807 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4808 		/* disable cgcg, cgls should be disabled */
4809 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4810 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4811 		/* disable cgcg and cgls in FSM */
4812 		if (def != data)
4813 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4814 	}
4815 
4816 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4817 }
4818 
4819 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4820 						      bool enable)
4821 {
4822 	uint32_t def, data;
4823 
4824 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4825 
4826 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4827 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4828 		/* unset CGCG override */
4829 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4830 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4831 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4832 		else
4833 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4834 		/* update CGCG and CGLS override bits */
4835 		if (def != data)
4836 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4837 
4838 		/* enable cgcg FSM(0x0000363F) */
4839 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4840 
4841 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4842 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4843 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4844 		else
4845 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4846 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4847 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4848 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4849 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4850 		if (def != data)
4851 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4852 
4853 		/* set IDLE_POLL_COUNT(0x00900100) */
4854 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4855 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4856 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4857 		if (def != data)
4858 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4859 	} else {
4860 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4861 		/* reset CGCG/CGLS bits */
4862 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4863 		/* disable cgcg and cgls in FSM */
4864 		if (def != data)
4865 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4866 	}
4867 
4868 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4869 }
4870 
4871 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4872 					    bool enable)
4873 {
4874 	if (enable) {
4875 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4876 		 * ===  MGCG + MGLS ===
4877 		 */
4878 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4879 		/* ===  CGCG /CGLS for GFX 3D Only === */
4880 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4881 		/* ===  CGCG + CGLS === */
4882 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4883 	} else {
4884 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4885 		 * ===  CGCG + CGLS ===
4886 		 */
4887 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4888 		/* ===  CGCG /CGLS for GFX 3D Only === */
4889 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4890 		/* ===  MGCG + MGLS === */
4891 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4892 	}
4893 	return 0;
4894 }
4895 
4896 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4897 {
4898 	u32 reg, data;
4899 
4900 	amdgpu_gfx_off_ctrl(adev, false);
4901 
4902 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4903 	if (amdgpu_sriov_is_pp_one_vf(adev))
4904 		data = RREG32_NO_KIQ(reg);
4905 	else
4906 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4907 
4908 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4909 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4910 
4911 	if (amdgpu_sriov_is_pp_one_vf(adev))
4912 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4913 	else
4914 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4915 
4916 	amdgpu_gfx_off_ctrl(adev, true);
4917 }
4918 
4919 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4920 					uint32_t offset,
4921 					struct soc15_reg_rlcg *entries, int arr_size)
4922 {
4923 	int i;
4924 	uint32_t reg;
4925 
4926 	if (!entries)
4927 		return false;
4928 
4929 	for (i = 0; i < arr_size; i++) {
4930 		const struct soc15_reg_rlcg *entry;
4931 
4932 		entry = &entries[i];
4933 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4934 		if (offset == reg)
4935 			return true;
4936 	}
4937 
4938 	return false;
4939 }
4940 
4941 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4942 {
4943 	return gfx_v9_0_check_rlcg_range(adev, offset,
4944 					(void *)rlcg_access_gc_9_0,
4945 					ARRAY_SIZE(rlcg_access_gc_9_0));
4946 }
4947 
4948 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4949 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4950 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4951 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4952 	.init = gfx_v9_0_rlc_init,
4953 	.get_csb_size = gfx_v9_0_get_csb_size,
4954 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4955 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4956 	.resume = gfx_v9_0_rlc_resume,
4957 	.stop = gfx_v9_0_rlc_stop,
4958 	.reset = gfx_v9_0_rlc_reset,
4959 	.start = gfx_v9_0_rlc_start,
4960 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4961 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4962 };
4963 
4964 static int gfx_v9_0_set_powergating_state(void *handle,
4965 					  enum amd_powergating_state state)
4966 {
4967 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4968 	bool enable = (state == AMD_PG_STATE_GATE);
4969 
4970 	switch (adev->ip_versions[GC_HWIP][0]) {
4971 	case IP_VERSION(9, 2, 2):
4972 	case IP_VERSION(9, 1, 0):
4973 	case IP_VERSION(9, 3, 0):
4974 		if (!enable)
4975 			amdgpu_gfx_off_ctrl(adev, false);
4976 
4977 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4978 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4979 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4980 		} else {
4981 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4982 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4983 		}
4984 
4985 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4986 			gfx_v9_0_enable_cp_power_gating(adev, true);
4987 		else
4988 			gfx_v9_0_enable_cp_power_gating(adev, false);
4989 
4990 		/* update gfx cgpg state */
4991 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4992 
4993 		/* update mgcg state */
4994 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4995 
4996 		if (enable)
4997 			amdgpu_gfx_off_ctrl(adev, true);
4998 		break;
4999 	case IP_VERSION(9, 2, 1):
5000 		amdgpu_gfx_off_ctrl(adev, enable);
5001 		break;
5002 	default:
5003 		break;
5004 	}
5005 
5006 	return 0;
5007 }
5008 
5009 static int gfx_v9_0_set_clockgating_state(void *handle,
5010 					  enum amd_clockgating_state state)
5011 {
5012 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5013 
5014 	if (amdgpu_sriov_vf(adev))
5015 		return 0;
5016 
5017 	switch (adev->ip_versions[GC_HWIP][0]) {
5018 	case IP_VERSION(9, 0, 1):
5019 	case IP_VERSION(9, 2, 1):
5020 	case IP_VERSION(9, 4, 0):
5021 	case IP_VERSION(9, 2, 2):
5022 	case IP_VERSION(9, 1, 0):
5023 	case IP_VERSION(9, 4, 1):
5024 	case IP_VERSION(9, 3, 0):
5025 	case IP_VERSION(9, 4, 2):
5026 		gfx_v9_0_update_gfx_clock_gating(adev,
5027 						 state == AMD_CG_STATE_GATE);
5028 		break;
5029 	default:
5030 		break;
5031 	}
5032 	return 0;
5033 }
5034 
5035 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5036 {
5037 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5038 	int data;
5039 
5040 	if (amdgpu_sriov_vf(adev))
5041 		*flags = 0;
5042 
5043 	/* AMD_CG_SUPPORT_GFX_MGCG */
5044 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5045 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5046 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5047 
5048 	/* AMD_CG_SUPPORT_GFX_CGCG */
5049 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5050 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5051 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5052 
5053 	/* AMD_CG_SUPPORT_GFX_CGLS */
5054 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5055 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5056 
5057 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5058 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5059 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5060 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5061 
5062 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5063 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5064 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5065 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5066 
5067 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5068 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5069 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5070 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5071 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5072 
5073 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5074 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5075 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5076 	}
5077 }
5078 
5079 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5080 {
5081 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5082 }
5083 
5084 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5085 {
5086 	struct amdgpu_device *adev = ring->adev;
5087 	u64 wptr;
5088 
5089 	/* XXX check if swapping is necessary on BE */
5090 	if (ring->use_doorbell) {
5091 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5092 	} else {
5093 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5094 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5095 	}
5096 
5097 	return wptr;
5098 }
5099 
5100 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5101 {
5102 	struct amdgpu_device *adev = ring->adev;
5103 
5104 	if (ring->use_doorbell) {
5105 		/* XXX check if swapping is necessary on BE */
5106 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5107 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5108 	} else {
5109 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5110 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5111 	}
5112 }
5113 
5114 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5115 {
5116 	struct amdgpu_device *adev = ring->adev;
5117 	u32 ref_and_mask, reg_mem_engine;
5118 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5119 
5120 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5121 		switch (ring->me) {
5122 		case 1:
5123 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5124 			break;
5125 		case 2:
5126 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5127 			break;
5128 		default:
5129 			return;
5130 		}
5131 		reg_mem_engine = 0;
5132 	} else {
5133 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5134 		reg_mem_engine = 1; /* pfp */
5135 	}
5136 
5137 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5138 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5139 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5140 			      ref_and_mask, ref_and_mask, 0x20);
5141 }
5142 
5143 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5144 					struct amdgpu_job *job,
5145 					struct amdgpu_ib *ib,
5146 					uint32_t flags)
5147 {
5148 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5149 	u32 header, control = 0;
5150 
5151 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5153 	else
5154 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5155 
5156 	control |= ib->length_dw | (vmid << 24);
5157 
5158 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5159 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5160 
5161 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5162 			gfx_v9_0_ring_emit_de_meta(ring);
5163 	}
5164 
5165 	amdgpu_ring_write(ring, header);
5166 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5167 	amdgpu_ring_write(ring,
5168 #ifdef __BIG_ENDIAN
5169 		(2 << 0) |
5170 #endif
5171 		lower_32_bits(ib->gpu_addr));
5172 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5173 	amdgpu_ring_write(ring, control);
5174 }
5175 
5176 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5177 					  struct amdgpu_job *job,
5178 					  struct amdgpu_ib *ib,
5179 					  uint32_t flags)
5180 {
5181 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5182 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5183 
5184 	/* Currently, there is a high possibility to get wave ID mismatch
5185 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5186 	 * different wave IDs than the GDS expects. This situation happens
5187 	 * randomly when at least 5 compute pipes use GDS ordered append.
5188 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5189 	 * Those are probably bugs somewhere else in the kernel driver.
5190 	 *
5191 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5192 	 * GDS to 0 for this ring (me/pipe).
5193 	 */
5194 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5195 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5196 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5197 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5198 	}
5199 
5200 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5201 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5202 	amdgpu_ring_write(ring,
5203 #ifdef __BIG_ENDIAN
5204 				(2 << 0) |
5205 #endif
5206 				lower_32_bits(ib->gpu_addr));
5207 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5208 	amdgpu_ring_write(ring, control);
5209 }
5210 
5211 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5212 				     u64 seq, unsigned flags)
5213 {
5214 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5215 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5216 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5217 
5218 	/* RELEASE_MEM - flush caches, send int */
5219 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5220 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5221 					       EOP_TC_NC_ACTION_EN) :
5222 					      (EOP_TCL1_ACTION_EN |
5223 					       EOP_TC_ACTION_EN |
5224 					       EOP_TC_WB_ACTION_EN |
5225 					       EOP_TC_MD_ACTION_EN)) |
5226 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5227 				 EVENT_INDEX(5)));
5228 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5229 
5230 	/*
5231 	 * the address should be Qword aligned if 64bit write, Dword
5232 	 * aligned if only send 32bit data low (discard data high)
5233 	 */
5234 	if (write64bit)
5235 		BUG_ON(addr & 0x7);
5236 	else
5237 		BUG_ON(addr & 0x3);
5238 	amdgpu_ring_write(ring, lower_32_bits(addr));
5239 	amdgpu_ring_write(ring, upper_32_bits(addr));
5240 	amdgpu_ring_write(ring, lower_32_bits(seq));
5241 	amdgpu_ring_write(ring, upper_32_bits(seq));
5242 	amdgpu_ring_write(ring, 0);
5243 }
5244 
5245 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5246 {
5247 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5248 	uint32_t seq = ring->fence_drv.sync_seq;
5249 	uint64_t addr = ring->fence_drv.gpu_addr;
5250 
5251 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5252 			      lower_32_bits(addr), upper_32_bits(addr),
5253 			      seq, 0xffffffff, 4);
5254 }
5255 
5256 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5257 					unsigned vmid, uint64_t pd_addr)
5258 {
5259 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5260 
5261 	/* compute doesn't have PFP */
5262 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5263 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5264 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5265 		amdgpu_ring_write(ring, 0x0);
5266 	}
5267 }
5268 
5269 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5270 {
5271 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5272 }
5273 
5274 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5275 {
5276 	u64 wptr;
5277 
5278 	/* XXX check if swapping is necessary on BE */
5279 	if (ring->use_doorbell)
5280 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5281 	else
5282 		BUG();
5283 	return wptr;
5284 }
5285 
5286 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5287 {
5288 	struct amdgpu_device *adev = ring->adev;
5289 
5290 	/* XXX check if swapping is necessary on BE */
5291 	if (ring->use_doorbell) {
5292 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5293 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5294 	} else{
5295 		BUG(); /* only DOORBELL method supported on gfx9 now */
5296 	}
5297 }
5298 
5299 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5300 					 u64 seq, unsigned int flags)
5301 {
5302 	struct amdgpu_device *adev = ring->adev;
5303 
5304 	/* we only allocate 32bit for each seq wb address */
5305 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5306 
5307 	/* write fence seq to the "addr" */
5308 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5309 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5310 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5311 	amdgpu_ring_write(ring, lower_32_bits(addr));
5312 	amdgpu_ring_write(ring, upper_32_bits(addr));
5313 	amdgpu_ring_write(ring, lower_32_bits(seq));
5314 
5315 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5316 		/* set register to trigger INT */
5317 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5318 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5319 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5320 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5321 		amdgpu_ring_write(ring, 0);
5322 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5323 	}
5324 }
5325 
5326 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5327 {
5328 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5329 	amdgpu_ring_write(ring, 0);
5330 }
5331 
5332 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5333 {
5334 	struct v9_ce_ib_state ce_payload = {0};
5335 	uint64_t csa_addr;
5336 	int cnt;
5337 
5338 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5339 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5340 
5341 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5342 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5343 				 WRITE_DATA_DST_SEL(8) |
5344 				 WR_CONFIRM) |
5345 				 WRITE_DATA_CACHE_POLICY(0));
5346 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5347 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5348 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5349 }
5350 
5351 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5352 {
5353 	struct v9_de_ib_state de_payload = {0};
5354 	uint64_t csa_addr, gds_addr;
5355 	int cnt;
5356 
5357 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5358 	gds_addr = csa_addr + 4096;
5359 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5360 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5361 
5362 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5363 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5364 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5365 				 WRITE_DATA_DST_SEL(8) |
5366 				 WR_CONFIRM) |
5367 				 WRITE_DATA_CACHE_POLICY(0));
5368 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5369 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5370 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5371 }
5372 
5373 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5374 				   bool secure)
5375 {
5376 	uint32_t v = secure ? FRAME_TMZ : 0;
5377 
5378 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5379 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5380 }
5381 
5382 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5383 {
5384 	uint32_t dw2 = 0;
5385 
5386 	if (amdgpu_sriov_vf(ring->adev))
5387 		gfx_v9_0_ring_emit_ce_meta(ring);
5388 
5389 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5390 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5391 		/* set load_global_config & load_global_uconfig */
5392 		dw2 |= 0x8001;
5393 		/* set load_cs_sh_regs */
5394 		dw2 |= 0x01000000;
5395 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5396 		dw2 |= 0x10002;
5397 
5398 		/* set load_ce_ram if preamble presented */
5399 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5400 			dw2 |= 0x10000000;
5401 	} else {
5402 		/* still load_ce_ram if this is the first time preamble presented
5403 		 * although there is no context switch happens.
5404 		 */
5405 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5406 			dw2 |= 0x10000000;
5407 	}
5408 
5409 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5410 	amdgpu_ring_write(ring, dw2);
5411 	amdgpu_ring_write(ring, 0);
5412 }
5413 
5414 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5415 {
5416 	unsigned ret;
5417 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5418 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5419 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5420 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5421 	ret = ring->wptr & ring->buf_mask;
5422 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5423 	return ret;
5424 }
5425 
5426 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5427 {
5428 	unsigned cur;
5429 	BUG_ON(offset > ring->buf_mask);
5430 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5431 
5432 	cur = (ring->wptr - 1) & ring->buf_mask;
5433 	if (likely(cur > offset))
5434 		ring->ring[offset] = cur - offset;
5435 	else
5436 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5437 }
5438 
5439 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5440 				    uint32_t reg_val_offs)
5441 {
5442 	struct amdgpu_device *adev = ring->adev;
5443 
5444 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5445 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5446 				(5 << 8) |	/* dst: memory */
5447 				(1 << 20));	/* write confirm */
5448 	amdgpu_ring_write(ring, reg);
5449 	amdgpu_ring_write(ring, 0);
5450 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5451 				reg_val_offs * 4));
5452 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5453 				reg_val_offs * 4));
5454 }
5455 
5456 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5457 				    uint32_t val)
5458 {
5459 	uint32_t cmd = 0;
5460 
5461 	switch (ring->funcs->type) {
5462 	case AMDGPU_RING_TYPE_GFX:
5463 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5464 		break;
5465 	case AMDGPU_RING_TYPE_KIQ:
5466 		cmd = (1 << 16); /* no inc addr */
5467 		break;
5468 	default:
5469 		cmd = WR_CONFIRM;
5470 		break;
5471 	}
5472 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5473 	amdgpu_ring_write(ring, cmd);
5474 	amdgpu_ring_write(ring, reg);
5475 	amdgpu_ring_write(ring, 0);
5476 	amdgpu_ring_write(ring, val);
5477 }
5478 
5479 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5480 					uint32_t val, uint32_t mask)
5481 {
5482 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5483 }
5484 
5485 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5486 						  uint32_t reg0, uint32_t reg1,
5487 						  uint32_t ref, uint32_t mask)
5488 {
5489 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5490 	struct amdgpu_device *adev = ring->adev;
5491 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5492 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5493 
5494 	if (fw_version_ok)
5495 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5496 				      ref, mask, 0x20);
5497 	else
5498 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5499 							   ref, mask);
5500 }
5501 
5502 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5503 {
5504 	struct amdgpu_device *adev = ring->adev;
5505 	uint32_t value = 0;
5506 
5507 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5508 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5509 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5510 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5511 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5512 }
5513 
5514 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5515 						 enum amdgpu_interrupt_state state)
5516 {
5517 	switch (state) {
5518 	case AMDGPU_IRQ_STATE_DISABLE:
5519 	case AMDGPU_IRQ_STATE_ENABLE:
5520 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5521 			       TIME_STAMP_INT_ENABLE,
5522 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5523 		break;
5524 	default:
5525 		break;
5526 	}
5527 }
5528 
5529 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5530 						     int me, int pipe,
5531 						     enum amdgpu_interrupt_state state)
5532 {
5533 	u32 mec_int_cntl, mec_int_cntl_reg;
5534 
5535 	/*
5536 	 * amdgpu controls only the first MEC. That's why this function only
5537 	 * handles the setting of interrupts for this specific MEC. All other
5538 	 * pipes' interrupts are set by amdkfd.
5539 	 */
5540 
5541 	if (me == 1) {
5542 		switch (pipe) {
5543 		case 0:
5544 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5545 			break;
5546 		case 1:
5547 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5548 			break;
5549 		case 2:
5550 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5551 			break;
5552 		case 3:
5553 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5554 			break;
5555 		default:
5556 			DRM_DEBUG("invalid pipe %d\n", pipe);
5557 			return;
5558 		}
5559 	} else {
5560 		DRM_DEBUG("invalid me %d\n", me);
5561 		return;
5562 	}
5563 
5564 	switch (state) {
5565 	case AMDGPU_IRQ_STATE_DISABLE:
5566 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5567 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5568 					     TIME_STAMP_INT_ENABLE, 0);
5569 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5570 		break;
5571 	case AMDGPU_IRQ_STATE_ENABLE:
5572 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5573 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5574 					     TIME_STAMP_INT_ENABLE, 1);
5575 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5576 		break;
5577 	default:
5578 		break;
5579 	}
5580 }
5581 
5582 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5583 					     struct amdgpu_irq_src *source,
5584 					     unsigned type,
5585 					     enum amdgpu_interrupt_state state)
5586 {
5587 	switch (state) {
5588 	case AMDGPU_IRQ_STATE_DISABLE:
5589 	case AMDGPU_IRQ_STATE_ENABLE:
5590 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5591 			       PRIV_REG_INT_ENABLE,
5592 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5593 		break;
5594 	default:
5595 		break;
5596 	}
5597 
5598 	return 0;
5599 }
5600 
5601 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5602 					      struct amdgpu_irq_src *source,
5603 					      unsigned type,
5604 					      enum amdgpu_interrupt_state state)
5605 {
5606 	switch (state) {
5607 	case AMDGPU_IRQ_STATE_DISABLE:
5608 	case AMDGPU_IRQ_STATE_ENABLE:
5609 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5610 			       PRIV_INSTR_INT_ENABLE,
5611 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5612 		break;
5613 	default:
5614 		break;
5615 	}
5616 
5617 	return 0;
5618 }
5619 
5620 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5621 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5622 			CP_ECC_ERROR_INT_ENABLE, 1)
5623 
5624 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5625 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5626 			CP_ECC_ERROR_INT_ENABLE, 0)
5627 
5628 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5629 					      struct amdgpu_irq_src *source,
5630 					      unsigned type,
5631 					      enum amdgpu_interrupt_state state)
5632 {
5633 	switch (state) {
5634 	case AMDGPU_IRQ_STATE_DISABLE:
5635 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5636 				CP_ECC_ERROR_INT_ENABLE, 0);
5637 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5638 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5639 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5640 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5641 		break;
5642 
5643 	case AMDGPU_IRQ_STATE_ENABLE:
5644 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5645 				CP_ECC_ERROR_INT_ENABLE, 1);
5646 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5647 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5648 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5649 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5650 		break;
5651 	default:
5652 		break;
5653 	}
5654 
5655 	return 0;
5656 }
5657 
5658 
5659 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5660 					    struct amdgpu_irq_src *src,
5661 					    unsigned type,
5662 					    enum amdgpu_interrupt_state state)
5663 {
5664 	switch (type) {
5665 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5666 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5667 		break;
5668 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5669 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5670 		break;
5671 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5672 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5673 		break;
5674 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5675 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5676 		break;
5677 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5678 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5679 		break;
5680 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5681 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5682 		break;
5683 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5684 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5685 		break;
5686 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5687 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5688 		break;
5689 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5690 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5691 		break;
5692 	default:
5693 		break;
5694 	}
5695 	return 0;
5696 }
5697 
5698 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5699 			    struct amdgpu_irq_src *source,
5700 			    struct amdgpu_iv_entry *entry)
5701 {
5702 	int i;
5703 	u8 me_id, pipe_id, queue_id;
5704 	struct amdgpu_ring *ring;
5705 
5706 	DRM_DEBUG("IH: CP EOP\n");
5707 	me_id = (entry->ring_id & 0x0c) >> 2;
5708 	pipe_id = (entry->ring_id & 0x03) >> 0;
5709 	queue_id = (entry->ring_id & 0x70) >> 4;
5710 
5711 	switch (me_id) {
5712 	case 0:
5713 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5714 		break;
5715 	case 1:
5716 	case 2:
5717 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5718 			ring = &adev->gfx.compute_ring[i];
5719 			/* Per-queue interrupt is supported for MEC starting from VI.
5720 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5721 			  */
5722 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5723 				amdgpu_fence_process(ring);
5724 		}
5725 		break;
5726 	}
5727 	return 0;
5728 }
5729 
5730 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5731 			   struct amdgpu_iv_entry *entry)
5732 {
5733 	u8 me_id, pipe_id, queue_id;
5734 	struct amdgpu_ring *ring;
5735 	int i;
5736 
5737 	me_id = (entry->ring_id & 0x0c) >> 2;
5738 	pipe_id = (entry->ring_id & 0x03) >> 0;
5739 	queue_id = (entry->ring_id & 0x70) >> 4;
5740 
5741 	switch (me_id) {
5742 	case 0:
5743 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5744 		break;
5745 	case 1:
5746 	case 2:
5747 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5748 			ring = &adev->gfx.compute_ring[i];
5749 			if (ring->me == me_id && ring->pipe == pipe_id &&
5750 			    ring->queue == queue_id)
5751 				drm_sched_fault(&ring->sched);
5752 		}
5753 		break;
5754 	}
5755 }
5756 
5757 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5758 				 struct amdgpu_irq_src *source,
5759 				 struct amdgpu_iv_entry *entry)
5760 {
5761 	DRM_ERROR("Illegal register access in command stream\n");
5762 	gfx_v9_0_fault(adev, entry);
5763 	return 0;
5764 }
5765 
5766 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5767 				  struct amdgpu_irq_src *source,
5768 				  struct amdgpu_iv_entry *entry)
5769 {
5770 	DRM_ERROR("Illegal instruction in command stream\n");
5771 	gfx_v9_0_fault(adev, entry);
5772 	return 0;
5773 }
5774 
5775 
5776 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5777 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5778 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5779 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5780 	},
5781 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5782 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5783 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5784 	},
5785 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5786 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5787 	  0, 0
5788 	},
5789 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5790 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5791 	  0, 0
5792 	},
5793 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5794 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5795 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5796 	},
5797 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5798 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5799 	  0, 0
5800 	},
5801 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5802 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5803 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5804 	},
5805 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5806 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5807 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5808 	},
5809 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5810 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5811 	  0, 0
5812 	},
5813 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5814 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5815 	  0, 0
5816 	},
5817 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5818 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5819 	  0, 0
5820 	},
5821 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5822 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5823 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5824 	},
5825 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5826 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5827 	  0, 0
5828 	},
5829 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5830 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5831 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5832 	},
5833 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5834 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5835 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5836 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5837 	},
5838 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5839 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5840 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5841 	  0, 0
5842 	},
5843 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5844 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5845 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5846 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5847 	},
5848 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5849 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5850 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5851 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5852 	},
5853 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5854 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5855 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5856 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5857 	},
5858 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5859 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5860 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5861 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5862 	},
5863 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5864 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5865 	  0, 0
5866 	},
5867 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5868 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5869 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5870 	},
5871 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5872 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5873 	  0, 0
5874 	},
5875 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5876 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5877 	  0, 0
5878 	},
5879 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5880 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5881 	  0, 0
5882 	},
5883 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5884 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5885 	  0, 0
5886 	},
5887 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5888 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5889 	  0, 0
5890 	},
5891 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5892 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5893 	  0, 0
5894 	},
5895 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5896 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5897 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5898 	},
5899 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5900 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5901 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5902 	},
5903 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5904 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5905 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5906 	},
5907 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5908 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5909 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5910 	},
5911 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5912 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5913 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5914 	},
5915 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5916 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5917 	  0, 0
5918 	},
5919 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5920 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5921 	  0, 0
5922 	},
5923 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5924 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5925 	  0, 0
5926 	},
5927 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5928 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5929 	  0, 0
5930 	},
5931 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5932 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5933 	  0, 0
5934 	},
5935 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5936 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5937 	  0, 0
5938 	},
5939 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5940 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5941 	  0, 0
5942 	},
5943 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5944 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5945 	  0, 0
5946 	},
5947 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5948 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5949 	  0, 0
5950 	},
5951 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5952 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5953 	  0, 0
5954 	},
5955 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5956 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5957 	  0, 0
5958 	},
5959 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5960 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5961 	  0, 0
5962 	},
5963 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5964 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5965 	  0, 0
5966 	},
5967 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5968 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5969 	  0, 0
5970 	},
5971 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5972 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5973 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5974 	},
5975 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5976 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5977 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5978 	},
5979 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5980 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5981 	  0, 0
5982 	},
5983 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5984 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5985 	  0, 0
5986 	},
5987 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5988 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5989 	  0, 0
5990 	},
5991 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5992 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5993 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5994 	},
5995 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5996 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5997 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5998 	},
5999 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6000 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6001 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6002 	},
6003 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6004 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6005 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6006 	},
6007 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6008 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6009 	  0, 0
6010 	},
6011 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6012 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6013 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6014 	},
6015 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6016 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6017 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6018 	},
6019 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6020 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6021 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6022 	},
6023 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6024 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6025 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6026 	},
6027 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6028 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6029 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6030 	},
6031 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6032 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6033 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6034 	},
6035 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6036 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6037 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6038 	},
6039 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6040 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6041 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6042 	},
6043 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6044 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6045 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6046 	},
6047 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6048 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6049 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6050 	},
6051 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6052 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6053 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6054 	},
6055 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6056 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6057 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6058 	},
6059 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6060 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6061 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6062 	},
6063 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6064 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6065 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6066 	},
6067 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6068 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6069 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6070 	},
6071 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6072 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6073 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6074 	},
6075 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6076 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6077 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6078 	},
6079 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6080 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6081 	  0, 0
6082 	},
6083 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6084 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6085 	  0, 0
6086 	},
6087 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6088 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6089 	  0, 0
6090 	},
6091 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6092 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6096 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6097 	  0, 0
6098 	},
6099 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6100 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6101 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6102 	},
6103 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6104 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6105 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6106 	},
6107 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6108 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6109 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6110 	},
6111 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6112 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6113 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6114 	},
6115 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6116 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6117 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6118 	},
6119 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6120 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6121 	  0, 0
6122 	},
6123 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6124 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6125 	  0, 0
6126 	},
6127 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6128 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6129 	  0, 0
6130 	},
6131 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6132 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6133 	  0, 0
6134 	},
6135 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6136 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6137 	  0, 0
6138 	},
6139 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6140 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6141 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6142 	},
6143 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6144 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6145 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6146 	},
6147 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6148 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6149 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6150 	},
6151 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6152 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6153 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6154 	},
6155 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6156 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6157 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6158 	},
6159 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6160 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6161 	  0, 0
6162 	},
6163 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6164 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6165 	  0, 0
6166 	},
6167 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6168 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6169 	  0, 0
6170 	},
6171 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6172 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6173 	  0, 0
6174 	},
6175 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6176 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6177 	  0, 0
6178 	},
6179 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6180 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6181 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6182 	},
6183 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6184 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6185 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6186 	},
6187 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6188 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6189 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6190 	},
6191 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6192 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6193 	  0, 0
6194 	},
6195 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6196 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6197 	  0, 0
6198 	},
6199 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6200 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6201 	  0, 0
6202 	},
6203 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6204 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6205 	  0, 0
6206 	},
6207 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6208 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6209 	  0, 0
6210 	},
6211 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6212 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6213 	  0, 0
6214 	}
6215 };
6216 
6217 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6218 				     void *inject_if)
6219 {
6220 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6221 	int ret;
6222 	struct ta_ras_trigger_error_input block_info = { 0 };
6223 
6224 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6225 		return -EINVAL;
6226 
6227 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6228 		return -EINVAL;
6229 
6230 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6231 		return -EPERM;
6232 
6233 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6234 	      info->head.type)) {
6235 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6236 			ras_gfx_subblocks[info->head.sub_block_index].name,
6237 			info->head.type);
6238 		return -EPERM;
6239 	}
6240 
6241 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6242 	      info->head.type)) {
6243 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6244 			ras_gfx_subblocks[info->head.sub_block_index].name,
6245 			info->head.type);
6246 		return -EPERM;
6247 	}
6248 
6249 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6250 	block_info.sub_block_index =
6251 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6252 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6253 	block_info.address = info->address;
6254 	block_info.value = info->value;
6255 
6256 	mutex_lock(&adev->grbm_idx_mutex);
6257 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6258 	mutex_unlock(&adev->grbm_idx_mutex);
6259 
6260 	return ret;
6261 }
6262 
6263 static const char *vml2_mems[] = {
6264 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6265 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6266 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6267 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6268 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6269 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6270 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6271 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6272 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6273 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6274 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6275 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6276 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6277 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6278 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6279 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6280 };
6281 
6282 static const char *vml2_walker_mems[] = {
6283 	"UTC_VML2_CACHE_PDE0_MEM0",
6284 	"UTC_VML2_CACHE_PDE0_MEM1",
6285 	"UTC_VML2_CACHE_PDE1_MEM0",
6286 	"UTC_VML2_CACHE_PDE1_MEM1",
6287 	"UTC_VML2_CACHE_PDE2_MEM0",
6288 	"UTC_VML2_CACHE_PDE2_MEM1",
6289 	"UTC_VML2_RDIF_LOG_FIFO",
6290 };
6291 
6292 static const char *atc_l2_cache_2m_mems[] = {
6293 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6294 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6295 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6296 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6297 };
6298 
6299 static const char *atc_l2_cache_4k_mems[] = {
6300 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6301 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6302 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6303 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6304 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6305 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6306 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6307 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6308 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6309 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6310 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6311 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6312 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6313 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6314 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6315 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6316 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6317 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6318 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6319 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6320 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6321 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6322 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6323 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6324 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6325 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6326 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6327 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6328 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6329 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6330 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6331 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6332 };
6333 
6334 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6335 					 struct ras_err_data *err_data)
6336 {
6337 	uint32_t i, data;
6338 	uint32_t sec_count, ded_count;
6339 
6340 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6341 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6342 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6343 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6344 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6345 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6346 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6347 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6348 
6349 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6350 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6351 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6352 
6353 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6354 		if (sec_count) {
6355 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6356 				"SEC %d\n", i, vml2_mems[i], sec_count);
6357 			err_data->ce_count += sec_count;
6358 		}
6359 
6360 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6361 		if (ded_count) {
6362 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6363 				"DED %d\n", i, vml2_mems[i], ded_count);
6364 			err_data->ue_count += ded_count;
6365 		}
6366 	}
6367 
6368 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6369 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6370 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6371 
6372 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6373 						SEC_COUNT);
6374 		if (sec_count) {
6375 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6376 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6377 			err_data->ce_count += sec_count;
6378 		}
6379 
6380 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6381 						DED_COUNT);
6382 		if (ded_count) {
6383 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6384 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6385 			err_data->ue_count += ded_count;
6386 		}
6387 	}
6388 
6389 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6390 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6391 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6392 
6393 		sec_count = (data & 0x00006000L) >> 0xd;
6394 		if (sec_count) {
6395 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6396 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6397 				sec_count);
6398 			err_data->ce_count += sec_count;
6399 		}
6400 	}
6401 
6402 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6403 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6404 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6405 
6406 		sec_count = (data & 0x00006000L) >> 0xd;
6407 		if (sec_count) {
6408 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6409 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6410 				sec_count);
6411 			err_data->ce_count += sec_count;
6412 		}
6413 
6414 		ded_count = (data & 0x00018000L) >> 0xf;
6415 		if (ded_count) {
6416 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6417 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6418 				ded_count);
6419 			err_data->ue_count += ded_count;
6420 		}
6421 	}
6422 
6423 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6424 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6425 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6426 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6427 
6428 	return 0;
6429 }
6430 
6431 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6432 	const struct soc15_reg_entry *reg,
6433 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6434 	uint32_t *sec_count, uint32_t *ded_count)
6435 {
6436 	uint32_t i;
6437 	uint32_t sec_cnt, ded_cnt;
6438 
6439 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6440 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6441 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6442 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6443 			continue;
6444 
6445 		sec_cnt = (value &
6446 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6447 				gfx_v9_0_ras_fields[i].sec_count_shift;
6448 		if (sec_cnt) {
6449 			dev_info(adev->dev, "GFX SubBlock %s, "
6450 				"Instance[%d][%d], SEC %d\n",
6451 				gfx_v9_0_ras_fields[i].name,
6452 				se_id, inst_id,
6453 				sec_cnt);
6454 			*sec_count += sec_cnt;
6455 		}
6456 
6457 		ded_cnt = (value &
6458 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6459 				gfx_v9_0_ras_fields[i].ded_count_shift;
6460 		if (ded_cnt) {
6461 			dev_info(adev->dev, "GFX SubBlock %s, "
6462 				"Instance[%d][%d], DED %d\n",
6463 				gfx_v9_0_ras_fields[i].name,
6464 				se_id, inst_id,
6465 				ded_cnt);
6466 			*ded_count += ded_cnt;
6467 		}
6468 	}
6469 
6470 	return 0;
6471 }
6472 
6473 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6474 {
6475 	int i, j, k;
6476 
6477 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6478 		return;
6479 
6480 	/* read back registers to clear the counters */
6481 	mutex_lock(&adev->grbm_idx_mutex);
6482 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6483 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6484 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6485 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6486 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6487 			}
6488 		}
6489 	}
6490 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6491 	mutex_unlock(&adev->grbm_idx_mutex);
6492 
6493 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6494 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6495 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6496 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6497 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6498 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6499 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6500 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6501 
6502 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6503 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6504 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6505 	}
6506 
6507 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6508 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6509 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6510 	}
6511 
6512 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6513 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6514 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6515 	}
6516 
6517 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6518 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6519 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6520 	}
6521 
6522 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6523 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6524 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6525 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6526 }
6527 
6528 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6529 					  void *ras_error_status)
6530 {
6531 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6532 	uint32_t sec_count = 0, ded_count = 0;
6533 	uint32_t i, j, k;
6534 	uint32_t reg_value;
6535 
6536 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6537 		return;
6538 
6539 	err_data->ue_count = 0;
6540 	err_data->ce_count = 0;
6541 
6542 	mutex_lock(&adev->grbm_idx_mutex);
6543 
6544 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6545 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6546 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6547 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6548 				reg_value =
6549 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6550 				if (reg_value)
6551 					gfx_v9_0_ras_error_count(adev,
6552 						&gfx_v9_0_edc_counter_regs[i],
6553 						j, k, reg_value,
6554 						&sec_count, &ded_count);
6555 			}
6556 		}
6557 	}
6558 
6559 	err_data->ce_count += sec_count;
6560 	err_data->ue_count += ded_count;
6561 
6562 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6563 	mutex_unlock(&adev->grbm_idx_mutex);
6564 
6565 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6566 }
6567 
6568 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6569 {
6570 	const unsigned int cp_coher_cntl =
6571 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6572 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6573 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6574 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6575 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6576 
6577 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6578 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6579 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6580 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6581 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6582 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6583 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6584 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6585 }
6586 
6587 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6588 					uint32_t pipe, bool enable)
6589 {
6590 	struct amdgpu_device *adev = ring->adev;
6591 	uint32_t val;
6592 	uint32_t wcl_cs_reg;
6593 
6594 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6595 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6596 
6597 	switch (pipe) {
6598 	case 0:
6599 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6600 		break;
6601 	case 1:
6602 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6603 		break;
6604 	case 2:
6605 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6606 		break;
6607 	case 3:
6608 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6609 		break;
6610 	default:
6611 		DRM_DEBUG("invalid pipe %d\n", pipe);
6612 		return;
6613 	}
6614 
6615 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6616 
6617 }
6618 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6619 {
6620 	struct amdgpu_device *adev = ring->adev;
6621 	uint32_t val;
6622 	int i;
6623 
6624 
6625 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6626 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6627 	 * around 25% of gpu resources.
6628 	 */
6629 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6630 	amdgpu_ring_emit_wreg(ring,
6631 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6632 			      val);
6633 
6634 	/* Restrict waves for normal/low priority compute queues as well
6635 	 * to get best QoS for high priority compute jobs.
6636 	 *
6637 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6638 	 */
6639 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6640 		if (i != ring->pipe)
6641 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6642 
6643 	}
6644 }
6645 
6646 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6647 	.name = "gfx_v9_0",
6648 	.early_init = gfx_v9_0_early_init,
6649 	.late_init = gfx_v9_0_late_init,
6650 	.sw_init = gfx_v9_0_sw_init,
6651 	.sw_fini = gfx_v9_0_sw_fini,
6652 	.hw_init = gfx_v9_0_hw_init,
6653 	.hw_fini = gfx_v9_0_hw_fini,
6654 	.suspend = gfx_v9_0_suspend,
6655 	.resume = gfx_v9_0_resume,
6656 	.is_idle = gfx_v9_0_is_idle,
6657 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6658 	.soft_reset = gfx_v9_0_soft_reset,
6659 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6660 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6661 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6662 };
6663 
6664 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6665 	.type = AMDGPU_RING_TYPE_GFX,
6666 	.align_mask = 0xff,
6667 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6668 	.support_64bit_ptrs = true,
6669 	.secure_submission_supported = true,
6670 	.vmhub = AMDGPU_GFXHUB_0,
6671 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6672 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6673 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6674 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6675 		5 +  /* COND_EXEC */
6676 		7 +  /* PIPELINE_SYNC */
6677 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6678 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6679 		2 + /* VM_FLUSH */
6680 		8 +  /* FENCE for VM_FLUSH */
6681 		20 + /* GDS switch */
6682 		4 + /* double SWITCH_BUFFER,
6683 		       the first COND_EXEC jump to the place just
6684 			   prior to this double SWITCH_BUFFER  */
6685 		5 + /* COND_EXEC */
6686 		7 +	 /*	HDP_flush */
6687 		4 +	 /*	VGT_flush */
6688 		14 + /*	CE_META */
6689 		31 + /*	DE_META */
6690 		3 + /* CNTX_CTRL */
6691 		5 + /* HDP_INVL */
6692 		8 + 8 + /* FENCE x2 */
6693 		2 + /* SWITCH_BUFFER */
6694 		7, /* gfx_v9_0_emit_mem_sync */
6695 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6696 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6697 	.emit_fence = gfx_v9_0_ring_emit_fence,
6698 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6699 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6700 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6701 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6702 	.test_ring = gfx_v9_0_ring_test_ring,
6703 	.test_ib = gfx_v9_0_ring_test_ib,
6704 	.insert_nop = amdgpu_ring_insert_nop,
6705 	.pad_ib = amdgpu_ring_generic_pad_ib,
6706 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6707 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6708 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6709 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6710 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6711 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6712 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6713 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6714 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6715 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6716 };
6717 
6718 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6719 	.type = AMDGPU_RING_TYPE_COMPUTE,
6720 	.align_mask = 0xff,
6721 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6722 	.support_64bit_ptrs = true,
6723 	.vmhub = AMDGPU_GFXHUB_0,
6724 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6725 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6726 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6727 	.emit_frame_size =
6728 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6729 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6730 		5 + /* hdp invalidate */
6731 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6732 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6733 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6734 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6735 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6736 		7 + /* gfx_v9_0_emit_mem_sync */
6737 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6738 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6739 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6740 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6741 	.emit_fence = gfx_v9_0_ring_emit_fence,
6742 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6743 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6744 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6745 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6746 	.test_ring = gfx_v9_0_ring_test_ring,
6747 	.test_ib = gfx_v9_0_ring_test_ib,
6748 	.insert_nop = amdgpu_ring_insert_nop,
6749 	.pad_ib = amdgpu_ring_generic_pad_ib,
6750 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6751 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6752 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6753 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6754 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6755 };
6756 
6757 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6758 	.type = AMDGPU_RING_TYPE_KIQ,
6759 	.align_mask = 0xff,
6760 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6761 	.support_64bit_ptrs = true,
6762 	.vmhub = AMDGPU_GFXHUB_0,
6763 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6764 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6765 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6766 	.emit_frame_size =
6767 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6768 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6769 		5 + /* hdp invalidate */
6770 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6771 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6772 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6773 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6774 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6775 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6776 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6777 	.test_ring = gfx_v9_0_ring_test_ring,
6778 	.insert_nop = amdgpu_ring_insert_nop,
6779 	.pad_ib = amdgpu_ring_generic_pad_ib,
6780 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6781 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6782 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6783 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6784 };
6785 
6786 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6787 {
6788 	int i;
6789 
6790 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6791 
6792 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6793 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6794 
6795 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6796 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6797 }
6798 
6799 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6800 	.set = gfx_v9_0_set_eop_interrupt_state,
6801 	.process = gfx_v9_0_eop_irq,
6802 };
6803 
6804 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6805 	.set = gfx_v9_0_set_priv_reg_fault_state,
6806 	.process = gfx_v9_0_priv_reg_irq,
6807 };
6808 
6809 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6810 	.set = gfx_v9_0_set_priv_inst_fault_state,
6811 	.process = gfx_v9_0_priv_inst_irq,
6812 };
6813 
6814 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6815 	.set = gfx_v9_0_set_cp_ecc_error_state,
6816 	.process = amdgpu_gfx_cp_ecc_error_irq,
6817 };
6818 
6819 
6820 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6821 {
6822 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6823 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6824 
6825 	adev->gfx.priv_reg_irq.num_types = 1;
6826 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6827 
6828 	adev->gfx.priv_inst_irq.num_types = 1;
6829 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6830 
6831 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6832 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6833 }
6834 
6835 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6836 {
6837 	switch (adev->ip_versions[GC_HWIP][0]) {
6838 	case IP_VERSION(9, 0, 1):
6839 	case IP_VERSION(9, 2, 1):
6840 	case IP_VERSION(9, 4, 0):
6841 	case IP_VERSION(9, 2, 2):
6842 	case IP_VERSION(9, 1, 0):
6843 	case IP_VERSION(9, 4, 1):
6844 	case IP_VERSION(9, 3, 0):
6845 	case IP_VERSION(9, 4, 2):
6846 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6847 		break;
6848 	default:
6849 		break;
6850 	}
6851 }
6852 
6853 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6854 {
6855 	/* init asci gds info */
6856 	switch (adev->ip_versions[GC_HWIP][0]) {
6857 	case IP_VERSION(9, 0, 1):
6858 	case IP_VERSION(9, 2, 1):
6859 	case IP_VERSION(9, 4, 0):
6860 		adev->gds.gds_size = 0x10000;
6861 		break;
6862 	case IP_VERSION(9, 2, 2):
6863 	case IP_VERSION(9, 1, 0):
6864 	case IP_VERSION(9, 4, 1):
6865 		adev->gds.gds_size = 0x1000;
6866 		break;
6867 	case IP_VERSION(9, 4, 2):
6868 		/* aldebaran removed all the GDS internal memory,
6869 		 * only support GWS opcode in kernel, like barrier
6870 		 * semaphore.etc */
6871 		adev->gds.gds_size = 0;
6872 		break;
6873 	default:
6874 		adev->gds.gds_size = 0x10000;
6875 		break;
6876 	}
6877 
6878 	switch (adev->ip_versions[GC_HWIP][0]) {
6879 	case IP_VERSION(9, 0, 1):
6880 	case IP_VERSION(9, 4, 0):
6881 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6882 		break;
6883 	case IP_VERSION(9, 2, 1):
6884 		adev->gds.gds_compute_max_wave_id = 0x27f;
6885 		break;
6886 	case IP_VERSION(9, 2, 2):
6887 	case IP_VERSION(9, 1, 0):
6888 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6889 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6890 		else
6891 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6892 		break;
6893 	case IP_VERSION(9, 4, 1):
6894 		adev->gds.gds_compute_max_wave_id = 0xfff;
6895 		break;
6896 	case IP_VERSION(9, 4, 2):
6897 		/* deprecated for Aldebaran, no usage at all */
6898 		adev->gds.gds_compute_max_wave_id = 0;
6899 		break;
6900 	default:
6901 		/* this really depends on the chip */
6902 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6903 		break;
6904 	}
6905 
6906 	adev->gds.gws_size = 64;
6907 	adev->gds.oa_size = 16;
6908 }
6909 
6910 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6911 						 u32 bitmap)
6912 {
6913 	u32 data;
6914 
6915 	if (!bitmap)
6916 		return;
6917 
6918 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6919 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6920 
6921 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6922 }
6923 
6924 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6925 {
6926 	u32 data, mask;
6927 
6928 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6929 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6930 
6931 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6932 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6933 
6934 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6935 
6936 	return (~data) & mask;
6937 }
6938 
6939 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6940 				 struct amdgpu_cu_info *cu_info)
6941 {
6942 	int i, j, k, counter, active_cu_number = 0;
6943 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6944 	unsigned disable_masks[4 * 4];
6945 
6946 	if (!adev || !cu_info)
6947 		return -EINVAL;
6948 
6949 	/*
6950 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6951 	 */
6952 	if (adev->gfx.config.max_shader_engines *
6953 		adev->gfx.config.max_sh_per_se > 16)
6954 		return -EINVAL;
6955 
6956 	amdgpu_gfx_parse_disable_cu(disable_masks,
6957 				    adev->gfx.config.max_shader_engines,
6958 				    adev->gfx.config.max_sh_per_se);
6959 
6960 	mutex_lock(&adev->grbm_idx_mutex);
6961 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6962 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6963 			mask = 1;
6964 			ao_bitmap = 0;
6965 			counter = 0;
6966 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6967 			gfx_v9_0_set_user_cu_inactive_bitmap(
6968 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6969 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6970 
6971 			/*
6972 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6973 			 * 4x4 size array, and it's usually suitable for Vega
6974 			 * ASICs which has 4*2 SE/SH layout.
6975 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6976 			 * To mostly reduce the impact, we make it compatible
6977 			 * with current bitmap array as below:
6978 			 *    SE4,SH0 --> bitmap[0][1]
6979 			 *    SE5,SH0 --> bitmap[1][1]
6980 			 *    SE6,SH0 --> bitmap[2][1]
6981 			 *    SE7,SH0 --> bitmap[3][1]
6982 			 */
6983 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6984 
6985 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6986 				if (bitmap & mask) {
6987 					if (counter < adev->gfx.config.max_cu_per_sh)
6988 						ao_bitmap |= mask;
6989 					counter ++;
6990 				}
6991 				mask <<= 1;
6992 			}
6993 			active_cu_number += counter;
6994 			if (i < 2 && j < 2)
6995 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6996 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6997 		}
6998 	}
6999 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7000 	mutex_unlock(&adev->grbm_idx_mutex);
7001 
7002 	cu_info->number = active_cu_number;
7003 	cu_info->ao_cu_mask = ao_cu_mask;
7004 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7005 
7006 	return 0;
7007 }
7008 
7009 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7010 {
7011 	.type = AMD_IP_BLOCK_TYPE_GFX,
7012 	.major = 9,
7013 	.minor = 0,
7014 	.rev = 0,
7015 	.funcs = &gfx_v9_0_ip_funcs,
7016 };
7017