xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision e93e075d340859af772214c267d27f09f9db3e51)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
131 
132 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
133 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
135 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
137 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
139 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
141 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
143 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
144 
145 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
146 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
147 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
148 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
149 
150 enum ta_ras_gfx_subblock {
151 	/*CPC*/
152 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
153 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPC_UCODE,
155 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
157 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
158 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
159 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
160 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
161 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
162 	/* CPF*/
163 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
164 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
165 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
166 	TA_RAS_BLOCK__GFX_CPF_TAG,
167 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
168 	/* CPG*/
169 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
170 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
171 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
172 	TA_RAS_BLOCK__GFX_CPG_TAG,
173 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
174 	/* GDS*/
175 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
176 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
177 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
180 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
181 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
182 	/* SPI*/
183 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
184 	/* SQ*/
185 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
186 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
187 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
188 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
189 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
190 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
191 	/* SQC (3 ranges)*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	/* SQC range 0*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
196 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
204 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 	/* SQC range 1*/
206 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
208 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
218 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 	/* SQC range 2*/
220 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
222 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
224 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
225 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
229 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
230 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
232 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
234 	/* TA*/
235 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
236 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
239 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
240 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
241 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
242 	/* TCA*/
243 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
244 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
245 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
246 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
247 	/* TCC (5 sub-ranges)*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	/* TCC range 0*/
250 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
255 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
257 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
258 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
259 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
260 	/* TCC range 1*/
261 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
262 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
263 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
265 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 	/* TCC range 2*/
267 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
268 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
269 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
270 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
271 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
272 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
273 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
275 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
277 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 	/* TCC range 3*/
279 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
280 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
281 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
283 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 	/* TCC range 4*/
285 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
287 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
290 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
292 	/* TCI*/
293 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
294 	/* TCP*/
295 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
296 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
297 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
298 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
299 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
300 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
301 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
302 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
303 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
304 	/* TD*/
305 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
306 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
307 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
308 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
309 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
310 	/* EA (3 sub-ranges)*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	/* EA range 0*/
313 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
315 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
316 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
317 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
318 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
320 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
322 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
323 	/* EA range 1*/
324 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
325 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
326 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
327 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
328 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
329 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
330 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
331 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
332 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
333 	/* EA range 2*/
334 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
337 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
338 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
339 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
340 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
341 	/* UTC VM L2 bank*/
342 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
343 	/* UTC VM walker*/
344 	TA_RAS_BLOCK__UTC_VML2_WALKER,
345 	/* UTC ATC L2 2MB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
347 	/* UTC ATC L2 4KB cache*/
348 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
349 	TA_RAS_BLOCK__GFX_MAX
350 };
351 
352 struct ras_gfx_subblock {
353 	unsigned char *name;
354 	int ta_subblock;
355 	int hw_supported_error_type;
356 	int sw_supported_error_type;
357 };
358 
359 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
360 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
361 		#subblock,                                                     \
362 		TA_RAS_BLOCK__##subblock,                                      \
363 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
364 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
365 	}
366 
367 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
368 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400 			     0, 0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
404 			     0, 0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
408 			     1),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
410 			     0, 0, 0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
422 			     0, 0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
428 			     0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
440 			     0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
456 			     1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
473 			     0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
476 			     0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
478 			     0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
480 			     0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
515 };
516 
517 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
518 {
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
539 };
540 
541 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
542 {
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
561 };
562 
563 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
564 {
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
576 };
577 
578 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
579 {
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
604 };
605 
606 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
607 {
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
615 };
616 
617 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
618 {
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
654 };
655 
656 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
657 {
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
661 };
662 
663 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
664 {
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
681 };
682 
683 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
684 {
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
698 };
699 
700 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
701 {
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
713 };
714 
715 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
716 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
717 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
718 };
719 
720 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
721 {
722 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 };
731 
732 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
733 {
734 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 };
743 
744 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
745 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
746 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
747 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
748 
749 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
751 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
753 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
754 				struct amdgpu_cu_info *cu_info);
755 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
756 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
757 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
758 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
759 					  void *ras_error_status);
760 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
761 				     void *inject_if);
762 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
763 
764 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
765 				uint64_t queue_mask)
766 {
767 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
768 	amdgpu_ring_write(kiq_ring,
769 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
770 		/* vmid_mask:0* queue_type:0 (KIQ) */
771 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
772 	amdgpu_ring_write(kiq_ring,
773 			lower_32_bits(queue_mask));	/* queue mask lo */
774 	amdgpu_ring_write(kiq_ring,
775 			upper_32_bits(queue_mask));	/* queue mask hi */
776 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
777 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
778 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
779 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
780 }
781 
782 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
783 				 struct amdgpu_ring *ring)
784 {
785 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
786 	uint64_t wptr_addr = ring->wptr_gpu_addr;
787 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
788 
789 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
790 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
791 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
792 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
793 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
794 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
795 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
796 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
797 			 /*queue_type: normal compute queue */
798 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
799 			 /* alloc format: all_on_one_pipe */
800 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
801 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
802 			 /* num_queues: must be 1 */
803 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
804 	amdgpu_ring_write(kiq_ring,
805 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
806 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
807 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
808 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
809 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
810 }
811 
812 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
813 				   struct amdgpu_ring *ring,
814 				   enum amdgpu_unmap_queues_action action,
815 				   u64 gpu_addr, u64 seq)
816 {
817 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
818 
819 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
820 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
821 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
822 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
823 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
824 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
825 	amdgpu_ring_write(kiq_ring,
826 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
827 
828 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
829 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
830 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
831 		amdgpu_ring_write(kiq_ring, seq);
832 	} else {
833 		amdgpu_ring_write(kiq_ring, 0);
834 		amdgpu_ring_write(kiq_ring, 0);
835 		amdgpu_ring_write(kiq_ring, 0);
836 	}
837 }
838 
839 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
840 				   struct amdgpu_ring *ring,
841 				   u64 addr,
842 				   u64 seq)
843 {
844 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
845 
846 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
847 	amdgpu_ring_write(kiq_ring,
848 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
849 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
850 			  PACKET3_QUERY_STATUS_COMMAND(2));
851 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
852 	amdgpu_ring_write(kiq_ring,
853 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
854 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
855 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
856 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
857 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
858 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
859 }
860 
861 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
862 				uint16_t pasid, uint32_t flush_type,
863 				bool all_hub)
864 {
865 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
866 	amdgpu_ring_write(kiq_ring,
867 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
868 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
869 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
870 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
871 }
872 
873 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
874 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
875 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
876 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
877 	.kiq_query_status = gfx_v9_0_kiq_query_status,
878 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
879 	.set_resources_size = 8,
880 	.map_queues_size = 7,
881 	.unmap_queues_size = 6,
882 	.query_status_size = 7,
883 	.invalidate_tlbs_size = 2,
884 };
885 
886 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
887 {
888 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
889 }
890 
891 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
892 {
893 	switch (adev->ip_versions[GC_HWIP][0]) {
894 	case IP_VERSION(9, 0, 1):
895 		soc15_program_register_sequence(adev,
896 						golden_settings_gc_9_0,
897 						ARRAY_SIZE(golden_settings_gc_9_0));
898 		soc15_program_register_sequence(adev,
899 						golden_settings_gc_9_0_vg10,
900 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
901 		break;
902 	case IP_VERSION(9, 2, 1):
903 		soc15_program_register_sequence(adev,
904 						golden_settings_gc_9_2_1,
905 						ARRAY_SIZE(golden_settings_gc_9_2_1));
906 		soc15_program_register_sequence(adev,
907 						golden_settings_gc_9_2_1_vg12,
908 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
909 		break;
910 	case IP_VERSION(9, 4, 0):
911 		soc15_program_register_sequence(adev,
912 						golden_settings_gc_9_0,
913 						ARRAY_SIZE(golden_settings_gc_9_0));
914 		soc15_program_register_sequence(adev,
915 						golden_settings_gc_9_0_vg20,
916 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
917 		break;
918 	case IP_VERSION(9, 4, 1):
919 		soc15_program_register_sequence(adev,
920 						golden_settings_gc_9_4_1_arct,
921 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
922 		break;
923 	case IP_VERSION(9, 2, 2):
924 	case IP_VERSION(9, 1, 0):
925 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
926 						ARRAY_SIZE(golden_settings_gc_9_1));
927 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
928 			soc15_program_register_sequence(adev,
929 							golden_settings_gc_9_1_rv2,
930 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
931 		else
932 			soc15_program_register_sequence(adev,
933 							golden_settings_gc_9_1_rv1,
934 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
935 		break;
936 	 case IP_VERSION(9, 3, 0):
937 		soc15_program_register_sequence(adev,
938 						golden_settings_gc_9_1_rn,
939 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
940 		return; /* for renoir, don't need common goldensetting */
941 	case IP_VERSION(9, 4, 2):
942 		gfx_v9_4_2_init_golden_registers(adev,
943 						 adev->smuio.funcs->get_die_id(adev));
944 		break;
945 	default:
946 		break;
947 	}
948 
949 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
950 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
951 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
952 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
953 }
954 
955 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
956 				       bool wc, uint32_t reg, uint32_t val)
957 {
958 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
959 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
960 				WRITE_DATA_DST_SEL(0) |
961 				(wc ? WR_CONFIRM : 0));
962 	amdgpu_ring_write(ring, reg);
963 	amdgpu_ring_write(ring, 0);
964 	amdgpu_ring_write(ring, val);
965 }
966 
967 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
968 				  int mem_space, int opt, uint32_t addr0,
969 				  uint32_t addr1, uint32_t ref, uint32_t mask,
970 				  uint32_t inv)
971 {
972 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
973 	amdgpu_ring_write(ring,
974 				 /* memory (1) or register (0) */
975 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
976 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
977 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
978 				 WAIT_REG_MEM_ENGINE(eng_sel)));
979 
980 	if (mem_space)
981 		BUG_ON(addr0 & 0x3); /* Dword align */
982 	amdgpu_ring_write(ring, addr0);
983 	amdgpu_ring_write(ring, addr1);
984 	amdgpu_ring_write(ring, ref);
985 	amdgpu_ring_write(ring, mask);
986 	amdgpu_ring_write(ring, inv); /* poll interval */
987 }
988 
989 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
990 {
991 	struct amdgpu_device *adev = ring->adev;
992 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
993 	uint32_t tmp = 0;
994 	unsigned i;
995 	int r;
996 
997 	WREG32(scratch, 0xCAFEDEAD);
998 	r = amdgpu_ring_alloc(ring, 3);
999 	if (r)
1000 		return r;
1001 
1002 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1003 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1004 	amdgpu_ring_write(ring, 0xDEADBEEF);
1005 	amdgpu_ring_commit(ring);
1006 
1007 	for (i = 0; i < adev->usec_timeout; i++) {
1008 		tmp = RREG32(scratch);
1009 		if (tmp == 0xDEADBEEF)
1010 			break;
1011 		udelay(1);
1012 	}
1013 
1014 	if (i >= adev->usec_timeout)
1015 		r = -ETIMEDOUT;
1016 	return r;
1017 }
1018 
1019 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1020 {
1021 	struct amdgpu_device *adev = ring->adev;
1022 	struct amdgpu_ib ib;
1023 	struct dma_fence *f = NULL;
1024 
1025 	unsigned index;
1026 	uint64_t gpu_addr;
1027 	uint32_t tmp;
1028 	long r;
1029 
1030 	r = amdgpu_device_wb_get(adev, &index);
1031 	if (r)
1032 		return r;
1033 
1034 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1035 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1036 	memset(&ib, 0, sizeof(ib));
1037 	r = amdgpu_ib_get(adev, NULL, 16,
1038 					AMDGPU_IB_POOL_DIRECT, &ib);
1039 	if (r)
1040 		goto err1;
1041 
1042 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1043 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1044 	ib.ptr[2] = lower_32_bits(gpu_addr);
1045 	ib.ptr[3] = upper_32_bits(gpu_addr);
1046 	ib.ptr[4] = 0xDEADBEEF;
1047 	ib.length_dw = 5;
1048 
1049 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1050 	if (r)
1051 		goto err2;
1052 
1053 	r = dma_fence_wait_timeout(f, false, timeout);
1054 	if (r == 0) {
1055 		r = -ETIMEDOUT;
1056 		goto err2;
1057 	} else if (r < 0) {
1058 		goto err2;
1059 	}
1060 
1061 	tmp = adev->wb.wb[index];
1062 	if (tmp == 0xDEADBEEF)
1063 		r = 0;
1064 	else
1065 		r = -EINVAL;
1066 
1067 err2:
1068 	amdgpu_ib_free(adev, &ib, NULL);
1069 	dma_fence_put(f);
1070 err1:
1071 	amdgpu_device_wb_free(adev, index);
1072 	return r;
1073 }
1074 
1075 
1076 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1077 {
1078 	release_firmware(adev->gfx.pfp_fw);
1079 	adev->gfx.pfp_fw = NULL;
1080 	release_firmware(adev->gfx.me_fw);
1081 	adev->gfx.me_fw = NULL;
1082 	release_firmware(adev->gfx.ce_fw);
1083 	adev->gfx.ce_fw = NULL;
1084 	release_firmware(adev->gfx.rlc_fw);
1085 	adev->gfx.rlc_fw = NULL;
1086 	release_firmware(adev->gfx.mec_fw);
1087 	adev->gfx.mec_fw = NULL;
1088 	release_firmware(adev->gfx.mec2_fw);
1089 	adev->gfx.mec2_fw = NULL;
1090 
1091 	kfree(adev->gfx.rlc.register_list_format);
1092 }
1093 
1094 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1095 {
1096 	adev->gfx.me_fw_write_wait = false;
1097 	adev->gfx.mec_fw_write_wait = false;
1098 
1099 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1100 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1101 	    (adev->gfx.mec_feature_version < 46) ||
1102 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1103 	    (adev->gfx.pfp_feature_version < 46)))
1104 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1105 
1106 	switch (adev->ip_versions[GC_HWIP][0]) {
1107 	case IP_VERSION(9, 0, 1):
1108 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1109 		    (adev->gfx.me_feature_version >= 42) &&
1110 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1111 		    (adev->gfx.pfp_feature_version >= 42))
1112 			adev->gfx.me_fw_write_wait = true;
1113 
1114 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1115 		    (adev->gfx.mec_feature_version >= 42))
1116 			adev->gfx.mec_fw_write_wait = true;
1117 		break;
1118 	case IP_VERSION(9, 2, 1):
1119 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1120 		    (adev->gfx.me_feature_version >= 44) &&
1121 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1122 		    (adev->gfx.pfp_feature_version >= 44))
1123 			adev->gfx.me_fw_write_wait = true;
1124 
1125 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1126 		    (adev->gfx.mec_feature_version >= 44))
1127 			adev->gfx.mec_fw_write_wait = true;
1128 		break;
1129 	case IP_VERSION(9, 4, 0):
1130 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1131 		    (adev->gfx.me_feature_version >= 44) &&
1132 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1133 		    (adev->gfx.pfp_feature_version >= 44))
1134 			adev->gfx.me_fw_write_wait = true;
1135 
1136 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1137 		    (adev->gfx.mec_feature_version >= 44))
1138 			adev->gfx.mec_fw_write_wait = true;
1139 		break;
1140 	case IP_VERSION(9, 1, 0):
1141 	case IP_VERSION(9, 2, 2):
1142 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1143 		    (adev->gfx.me_feature_version >= 42) &&
1144 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1145 		    (adev->gfx.pfp_feature_version >= 42))
1146 			adev->gfx.me_fw_write_wait = true;
1147 
1148 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1149 		    (adev->gfx.mec_feature_version >= 42))
1150 			adev->gfx.mec_fw_write_wait = true;
1151 		break;
1152 	default:
1153 		adev->gfx.me_fw_write_wait = true;
1154 		adev->gfx.mec_fw_write_wait = true;
1155 		break;
1156 	}
1157 }
1158 
1159 struct amdgpu_gfxoff_quirk {
1160 	u16 chip_vendor;
1161 	u16 chip_device;
1162 	u16 subsys_vendor;
1163 	u16 subsys_device;
1164 	u8 revision;
1165 };
1166 
1167 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1168 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1169 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1170 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1171 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1172 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1173 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1174 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1175 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1176 	{ 0, 0, 0, 0, 0 },
1177 };
1178 
1179 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1180 {
1181 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1182 
1183 	while (p && p->chip_device != 0) {
1184 		if (pdev->vendor == p->chip_vendor &&
1185 		    pdev->device == p->chip_device &&
1186 		    pdev->subsystem_vendor == p->subsys_vendor &&
1187 		    pdev->subsystem_device == p->subsys_device &&
1188 		    pdev->revision == p->revision) {
1189 			return true;
1190 		}
1191 		++p;
1192 	}
1193 	return false;
1194 }
1195 
1196 static bool is_raven_kicker(struct amdgpu_device *adev)
1197 {
1198 	if (adev->pm.fw_version >= 0x41e2b)
1199 		return true;
1200 	else
1201 		return false;
1202 }
1203 
1204 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1205 {
1206 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1207 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1208 	    (adev->gfx.me_feature_version >= 52))
1209 		return true;
1210 	else
1211 		return false;
1212 }
1213 
1214 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1215 {
1216 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1217 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1218 
1219 	switch (adev->ip_versions[GC_HWIP][0]) {
1220 	case IP_VERSION(9, 0, 1):
1221 	case IP_VERSION(9, 2, 1):
1222 	case IP_VERSION(9, 4, 0):
1223 		break;
1224 	case IP_VERSION(9, 2, 2):
1225 	case IP_VERSION(9, 1, 0):
1226 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1227 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1228 		    ((!is_raven_kicker(adev) &&
1229 		      adev->gfx.rlc_fw_version < 531) ||
1230 		     (adev->gfx.rlc_feature_version < 1) ||
1231 		     !adev->gfx.rlc.is_rlc_v2_1))
1232 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1233 
1234 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1235 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1236 				AMD_PG_SUPPORT_CP |
1237 				AMD_PG_SUPPORT_RLC_SMU_HS;
1238 		break;
1239 	case IP_VERSION(9, 3, 0):
1240 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1241 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1242 				AMD_PG_SUPPORT_CP |
1243 				AMD_PG_SUPPORT_RLC_SMU_HS;
1244 		break;
1245 	default:
1246 		break;
1247 	}
1248 }
1249 
1250 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1251 					  const char *chip_name)
1252 {
1253 	char fw_name[30];
1254 	int err;
1255 
1256 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1257 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1258 	if (err)
1259 		goto out;
1260 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1261 	if (err)
1262 		goto out;
1263 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1264 
1265 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1266 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1267 	if (err)
1268 		goto out;
1269 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1270 	if (err)
1271 		goto out;
1272 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1273 
1274 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1275 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1276 	if (err)
1277 		goto out;
1278 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1279 	if (err)
1280 		goto out;
1281 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1282 
1283 out:
1284 	if (err) {
1285 		dev_err(adev->dev,
1286 			"gfx9: Failed to init firmware \"%s\"\n",
1287 			fw_name);
1288 		release_firmware(adev->gfx.pfp_fw);
1289 		adev->gfx.pfp_fw = NULL;
1290 		release_firmware(adev->gfx.me_fw);
1291 		adev->gfx.me_fw = NULL;
1292 		release_firmware(adev->gfx.ce_fw);
1293 		adev->gfx.ce_fw = NULL;
1294 	}
1295 	return err;
1296 }
1297 
1298 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1299 					  const char *chip_name)
1300 {
1301 	char fw_name[30];
1302 	int err;
1303 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1304 	uint16_t version_major;
1305 	uint16_t version_minor;
1306 	uint32_t smu_version;
1307 
1308 	/*
1309 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1310 	 * instead of picasso_rlc.bin.
1311 	 * Judgment method:
1312 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1313 	 *          or revision >= 0xD8 && revision <= 0xDF
1314 	 * otherwise is PCO FP5
1315 	 */
1316 	if (!strcmp(chip_name, "picasso") &&
1317 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1318 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1319 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1320 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1321 		(smu_version >= 0x41e2b))
1322 		/**
1323 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1324 		*/
1325 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1326 	else
1327 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1328 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1329 	if (err)
1330 		goto out;
1331 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1332 	if (err)
1333 		goto out;
1334 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1335 
1336 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1337 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1338 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1339 out:
1340 	if (err) {
1341 		dev_err(adev->dev,
1342 			"gfx9: Failed to init firmware \"%s\"\n",
1343 			fw_name);
1344 		release_firmware(adev->gfx.rlc_fw);
1345 		adev->gfx.rlc_fw = NULL;
1346 	}
1347 	return err;
1348 }
1349 
1350 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1351 {
1352 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1353 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1354 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1355 		return false;
1356 
1357 	return true;
1358 }
1359 
1360 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1361 					  const char *chip_name)
1362 {
1363 	char fw_name[30];
1364 	int err;
1365 
1366 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1367 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1368 	else
1369 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1370 
1371 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1372 	if (err)
1373 		goto out;
1374 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1375 	if (err)
1376 		goto out;
1377 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1378 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1379 
1380 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1381 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1382 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1383 		else
1384 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1385 
1386 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1387 		if (!err) {
1388 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1389 			if (err)
1390 				goto out;
1391 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1392 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1393 		} else {
1394 			err = 0;
1395 			adev->gfx.mec2_fw = NULL;
1396 		}
1397 	} else {
1398 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1399 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1400 	}
1401 
1402 out:
1403 	gfx_v9_0_check_if_need_gfxoff(adev);
1404 	gfx_v9_0_check_fw_write_wait(adev);
1405 	if (err) {
1406 		dev_err(adev->dev,
1407 			"gfx9: Failed to init firmware \"%s\"\n",
1408 			fw_name);
1409 		release_firmware(adev->gfx.mec_fw);
1410 		adev->gfx.mec_fw = NULL;
1411 		release_firmware(adev->gfx.mec2_fw);
1412 		adev->gfx.mec2_fw = NULL;
1413 	}
1414 	return err;
1415 }
1416 
1417 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1418 {
1419 	const char *chip_name;
1420 	int r;
1421 
1422 	DRM_DEBUG("\n");
1423 
1424 	switch (adev->ip_versions[GC_HWIP][0]) {
1425 	case IP_VERSION(9, 0, 1):
1426 		chip_name = "vega10";
1427 		break;
1428 	case IP_VERSION(9, 2, 1):
1429 		chip_name = "vega12";
1430 		break;
1431 	case IP_VERSION(9, 4, 0):
1432 		chip_name = "vega20";
1433 		break;
1434 	case IP_VERSION(9, 2, 2):
1435 	case IP_VERSION(9, 1, 0):
1436 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1437 			chip_name = "raven2";
1438 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1439 			chip_name = "picasso";
1440 		else
1441 			chip_name = "raven";
1442 		break;
1443 	case IP_VERSION(9, 4, 1):
1444 		chip_name = "arcturus";
1445 		break;
1446 	case IP_VERSION(9, 3, 0):
1447 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1448 			chip_name = "renoir";
1449 		else
1450 			chip_name = "green_sardine";
1451 		break;
1452 	case IP_VERSION(9, 4, 2):
1453 		chip_name = "aldebaran";
1454 		break;
1455 	default:
1456 		BUG();
1457 	}
1458 
1459 	/* No CPG in Arcturus */
1460 	if (adev->gfx.num_gfx_rings) {
1461 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1462 		if (r)
1463 			return r;
1464 	}
1465 
1466 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1467 	if (r)
1468 		return r;
1469 
1470 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1471 	if (r)
1472 		return r;
1473 
1474 	return r;
1475 }
1476 
1477 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1478 {
1479 	u32 count = 0;
1480 	const struct cs_section_def *sect = NULL;
1481 	const struct cs_extent_def *ext = NULL;
1482 
1483 	/* begin clear state */
1484 	count += 2;
1485 	/* context control state */
1486 	count += 3;
1487 
1488 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1489 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1490 			if (sect->id == SECT_CONTEXT)
1491 				count += 2 + ext->reg_count;
1492 			else
1493 				return 0;
1494 		}
1495 	}
1496 
1497 	/* end clear state */
1498 	count += 2;
1499 	/* clear state */
1500 	count += 2;
1501 
1502 	return count;
1503 }
1504 
1505 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1506 				    volatile u32 *buffer)
1507 {
1508 	u32 count = 0, i;
1509 	const struct cs_section_def *sect = NULL;
1510 	const struct cs_extent_def *ext = NULL;
1511 
1512 	if (adev->gfx.rlc.cs_data == NULL)
1513 		return;
1514 	if (buffer == NULL)
1515 		return;
1516 
1517 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1518 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1519 
1520 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1521 	buffer[count++] = cpu_to_le32(0x80000000);
1522 	buffer[count++] = cpu_to_le32(0x80000000);
1523 
1524 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1525 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1526 			if (sect->id == SECT_CONTEXT) {
1527 				buffer[count++] =
1528 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1529 				buffer[count++] = cpu_to_le32(ext->reg_index -
1530 						PACKET3_SET_CONTEXT_REG_START);
1531 				for (i = 0; i < ext->reg_count; i++)
1532 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1533 			} else {
1534 				return;
1535 			}
1536 		}
1537 	}
1538 
1539 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1540 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1541 
1542 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1543 	buffer[count++] = cpu_to_le32(0);
1544 }
1545 
1546 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1547 {
1548 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1549 	uint32_t pg_always_on_cu_num = 2;
1550 	uint32_t always_on_cu_num;
1551 	uint32_t i, j, k;
1552 	uint32_t mask, cu_bitmap, counter;
1553 
1554 	if (adev->flags & AMD_IS_APU)
1555 		always_on_cu_num = 4;
1556 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1557 		always_on_cu_num = 8;
1558 	else
1559 		always_on_cu_num = 12;
1560 
1561 	mutex_lock(&adev->grbm_idx_mutex);
1562 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1563 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1564 			mask = 1;
1565 			cu_bitmap = 0;
1566 			counter = 0;
1567 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
1568 
1569 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1570 				if (cu_info->bitmap[i][j] & mask) {
1571 					if (counter == pg_always_on_cu_num)
1572 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1573 					if (counter < always_on_cu_num)
1574 						cu_bitmap |= mask;
1575 					else
1576 						break;
1577 					counter++;
1578 				}
1579 				mask <<= 1;
1580 			}
1581 
1582 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1583 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1584 		}
1585 	}
1586 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1587 	mutex_unlock(&adev->grbm_idx_mutex);
1588 }
1589 
1590 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1591 {
1592 	uint32_t data;
1593 
1594 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1595 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1596 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1597 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1598 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1599 
1600 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1601 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1602 
1603 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1604 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1605 
1606 	mutex_lock(&adev->grbm_idx_mutex);
1607 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1608 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1609 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1610 
1611 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1612 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1613 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1614 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1615 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1616 
1617 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1618 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1619 	data &= 0x0000FFFF;
1620 	data |= 0x00C00000;
1621 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1622 
1623 	/*
1624 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1625 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1626 	 */
1627 
1628 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1629 	 * but used for RLC_LB_CNTL configuration */
1630 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1631 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1632 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1633 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1634 	mutex_unlock(&adev->grbm_idx_mutex);
1635 
1636 	gfx_v9_0_init_always_on_cu_mask(adev);
1637 }
1638 
1639 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1640 {
1641 	uint32_t data;
1642 
1643 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1644 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1645 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1646 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1647 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1648 
1649 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1650 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1651 
1652 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1653 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1654 
1655 	mutex_lock(&adev->grbm_idx_mutex);
1656 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1657 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1658 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1659 
1660 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1661 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1662 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1663 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1664 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1665 
1666 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1667 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1668 	data &= 0x0000FFFF;
1669 	data |= 0x00C00000;
1670 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1671 
1672 	/*
1673 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1674 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1675 	 */
1676 
1677 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1678 	 * but used for RLC_LB_CNTL configuration */
1679 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1680 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1681 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1682 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1683 	mutex_unlock(&adev->grbm_idx_mutex);
1684 
1685 	gfx_v9_0_init_always_on_cu_mask(adev);
1686 }
1687 
1688 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1689 {
1690 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1691 }
1692 
1693 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1694 {
1695 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1696 		return 5;
1697 	else
1698 		return 4;
1699 }
1700 
1701 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1702 {
1703 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1704 
1705 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1706 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1707 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1708 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1709 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1710 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1711 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1712 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1713 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1714 }
1715 
1716 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1717 {
1718 	const struct cs_section_def *cs_data;
1719 	int r;
1720 
1721 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1722 
1723 	cs_data = adev->gfx.rlc.cs_data;
1724 
1725 	if (cs_data) {
1726 		/* init clear state block */
1727 		r = amdgpu_gfx_rlc_init_csb(adev);
1728 		if (r)
1729 			return r;
1730 	}
1731 
1732 	if (adev->flags & AMD_IS_APU) {
1733 		/* TODO: double check the cp_table_size for RV */
1734 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1735 		r = amdgpu_gfx_rlc_init_cpt(adev);
1736 		if (r)
1737 			return r;
1738 	}
1739 
1740 	switch (adev->ip_versions[GC_HWIP][0]) {
1741 	case IP_VERSION(9, 2, 2):
1742 	case IP_VERSION(9, 1, 0):
1743 		gfx_v9_0_init_lbpw(adev);
1744 		break;
1745 	case IP_VERSION(9, 4, 0):
1746 		gfx_v9_4_init_lbpw(adev);
1747 		break;
1748 	default:
1749 		break;
1750 	}
1751 
1752 	/* init spm vmid with 0xf */
1753 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1754 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1755 
1756 	return 0;
1757 }
1758 
1759 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1760 {
1761 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1762 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1763 }
1764 
1765 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1766 {
1767 	int r;
1768 	u32 *hpd;
1769 	const __le32 *fw_data;
1770 	unsigned fw_size;
1771 	u32 *fw;
1772 	size_t mec_hpd_size;
1773 
1774 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1775 
1776 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1777 
1778 	/* take ownership of the relevant compute queues */
1779 	amdgpu_gfx_compute_queue_acquire(adev);
1780 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1781 	if (mec_hpd_size) {
1782 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1783 					      AMDGPU_GEM_DOMAIN_VRAM,
1784 					      &adev->gfx.mec.hpd_eop_obj,
1785 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1786 					      (void **)&hpd);
1787 		if (r) {
1788 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1789 			gfx_v9_0_mec_fini(adev);
1790 			return r;
1791 		}
1792 
1793 		memset(hpd, 0, mec_hpd_size);
1794 
1795 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1796 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1797 	}
1798 
1799 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1800 
1801 	fw_data = (const __le32 *)
1802 		(adev->gfx.mec_fw->data +
1803 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1804 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1805 
1806 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1807 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1808 				      &adev->gfx.mec.mec_fw_obj,
1809 				      &adev->gfx.mec.mec_fw_gpu_addr,
1810 				      (void **)&fw);
1811 	if (r) {
1812 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1813 		gfx_v9_0_mec_fini(adev);
1814 		return r;
1815 	}
1816 
1817 	memcpy(fw, fw_data, fw_size);
1818 
1819 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1820 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1821 
1822 	return 0;
1823 }
1824 
1825 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1826 {
1827 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1828 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1829 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1830 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1831 		(SQ_IND_INDEX__FORCE_READ_MASK));
1832 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1833 }
1834 
1835 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1836 			   uint32_t wave, uint32_t thread,
1837 			   uint32_t regno, uint32_t num, uint32_t *out)
1838 {
1839 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1840 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1841 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1842 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1843 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1844 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1845 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1846 	while (num--)
1847 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1848 }
1849 
1850 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1851 {
1852 	/* type 1 wave data */
1853 	dst[(*no_fields)++] = 1;
1854 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1855 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1856 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1857 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1858 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1859 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1860 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1861 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1862 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1863 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1864 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1865 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1866 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1867 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1868 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1869 }
1870 
1871 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1872 				     uint32_t wave, uint32_t start,
1873 				     uint32_t size, uint32_t *dst)
1874 {
1875 	wave_read_regs(
1876 		adev, simd, wave, 0,
1877 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1878 }
1879 
1880 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1881 				     uint32_t wave, uint32_t thread,
1882 				     uint32_t start, uint32_t size,
1883 				     uint32_t *dst)
1884 {
1885 	wave_read_regs(
1886 		adev, simd, wave, thread,
1887 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1888 }
1889 
1890 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1891 				  u32 me, u32 pipe, u32 q, u32 vm)
1892 {
1893 	soc15_grbm_select(adev, me, pipe, q, vm);
1894 }
1895 
1896 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1897         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1898         .select_se_sh = &gfx_v9_0_select_se_sh,
1899         .read_wave_data = &gfx_v9_0_read_wave_data,
1900         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1901         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1902         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1903 };
1904 
1905 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1906 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1907 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1908 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1909 };
1910 
1911 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1912 	.ras_block = {
1913 		.hw_ops = &gfx_v9_0_ras_ops,
1914 	},
1915 };
1916 
1917 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1918 {
1919 	u32 gb_addr_config;
1920 	int err;
1921 
1922 	switch (adev->ip_versions[GC_HWIP][0]) {
1923 	case IP_VERSION(9, 0, 1):
1924 		adev->gfx.config.max_hw_contexts = 8;
1925 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1929 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1930 		break;
1931 	case IP_VERSION(9, 2, 1):
1932 		adev->gfx.config.max_hw_contexts = 8;
1933 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1934 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1935 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1936 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1937 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1938 		DRM_INFO("fix gfx.config for vega12\n");
1939 		break;
1940 	case IP_VERSION(9, 4, 0):
1941 		adev->gfx.ras = &gfx_v9_0_ras;
1942 		adev->gfx.config.max_hw_contexts = 8;
1943 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1944 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1945 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1946 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1947 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1948 		gb_addr_config &= ~0xf3e777ff;
1949 		gb_addr_config |= 0x22014042;
1950 		/* check vbios table if gpu info is not available */
1951 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1952 		if (err)
1953 			return err;
1954 		break;
1955 	case IP_VERSION(9, 2, 2):
1956 	case IP_VERSION(9, 1, 0):
1957 		adev->gfx.config.max_hw_contexts = 8;
1958 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1959 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1960 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1961 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1962 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1963 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1964 		else
1965 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1966 		break;
1967 	case IP_VERSION(9, 4, 1):
1968 		adev->gfx.ras = &gfx_v9_4_ras;
1969 		adev->gfx.config.max_hw_contexts = 8;
1970 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1975 		gb_addr_config &= ~0xf3e777ff;
1976 		gb_addr_config |= 0x22014042;
1977 		break;
1978 	case IP_VERSION(9, 3, 0):
1979 		adev->gfx.config.max_hw_contexts = 8;
1980 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1981 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1982 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1983 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1984 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1985 		gb_addr_config &= ~0xf3e777ff;
1986 		gb_addr_config |= 0x22010042;
1987 		break;
1988 	case IP_VERSION(9, 4, 2):
1989 		adev->gfx.ras = &gfx_v9_4_2_ras;
1990 		adev->gfx.config.max_hw_contexts = 8;
1991 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1992 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1993 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1994 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1995 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1996 		gb_addr_config &= ~0xf3e777ff;
1997 		gb_addr_config |= 0x22014042;
1998 		/* check vbios table if gpu info is not available */
1999 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2000 		if (err)
2001 			return err;
2002 		break;
2003 	default:
2004 		BUG();
2005 		break;
2006 	}
2007 
2008 	if (adev->gfx.ras) {
2009 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2010 		if (err) {
2011 			DRM_ERROR("Failed to register gfx ras block!\n");
2012 			return err;
2013 		}
2014 
2015 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2016 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2017 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2018 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2019 
2020 		/* If not define special ras_late_init function, use gfx default ras_late_init */
2021 		if (!adev->gfx.ras->ras_block.ras_late_init)
2022 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2023 
2024 		/* If not defined special ras_cb function, use default ras_cb */
2025 		if (!adev->gfx.ras->ras_block.ras_cb)
2026 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2027 	}
2028 
2029 	adev->gfx.config.gb_addr_config = gb_addr_config;
2030 
2031 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2032 			REG_GET_FIELD(
2033 					adev->gfx.config.gb_addr_config,
2034 					GB_ADDR_CONFIG,
2035 					NUM_PIPES);
2036 
2037 	adev->gfx.config.max_tile_pipes =
2038 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2039 
2040 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2041 			REG_GET_FIELD(
2042 					adev->gfx.config.gb_addr_config,
2043 					GB_ADDR_CONFIG,
2044 					NUM_BANKS);
2045 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2046 			REG_GET_FIELD(
2047 					adev->gfx.config.gb_addr_config,
2048 					GB_ADDR_CONFIG,
2049 					MAX_COMPRESSED_FRAGS);
2050 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2051 			REG_GET_FIELD(
2052 					adev->gfx.config.gb_addr_config,
2053 					GB_ADDR_CONFIG,
2054 					NUM_RB_PER_SE);
2055 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2056 			REG_GET_FIELD(
2057 					adev->gfx.config.gb_addr_config,
2058 					GB_ADDR_CONFIG,
2059 					NUM_SHADER_ENGINES);
2060 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2061 			REG_GET_FIELD(
2062 					adev->gfx.config.gb_addr_config,
2063 					GB_ADDR_CONFIG,
2064 					PIPE_INTERLEAVE_SIZE));
2065 
2066 	return 0;
2067 }
2068 
2069 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2070 				      int mec, int pipe, int queue)
2071 {
2072 	unsigned irq_type;
2073 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2074 	unsigned int hw_prio;
2075 
2076 	ring = &adev->gfx.compute_ring[ring_id];
2077 
2078 	/* mec0 is me1 */
2079 	ring->me = mec + 1;
2080 	ring->pipe = pipe;
2081 	ring->queue = queue;
2082 
2083 	ring->ring_obj = NULL;
2084 	ring->use_doorbell = true;
2085 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2086 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2087 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2088 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2089 
2090 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2091 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2092 		+ ring->pipe;
2093 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2094 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2095 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2096 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2097 				hw_prio, NULL);
2098 }
2099 
2100 static int gfx_v9_0_sw_init(void *handle)
2101 {
2102 	int i, j, k, r, ring_id;
2103 	struct amdgpu_ring *ring;
2104 	struct amdgpu_kiq *kiq;
2105 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2106 
2107 	switch (adev->ip_versions[GC_HWIP][0]) {
2108 	case IP_VERSION(9, 0, 1):
2109 	case IP_VERSION(9, 2, 1):
2110 	case IP_VERSION(9, 4, 0):
2111 	case IP_VERSION(9, 2, 2):
2112 	case IP_VERSION(9, 1, 0):
2113 	case IP_VERSION(9, 4, 1):
2114 	case IP_VERSION(9, 3, 0):
2115 	case IP_VERSION(9, 4, 2):
2116 		adev->gfx.mec.num_mec = 2;
2117 		break;
2118 	default:
2119 		adev->gfx.mec.num_mec = 1;
2120 		break;
2121 	}
2122 
2123 	adev->gfx.mec.num_pipe_per_mec = 4;
2124 	adev->gfx.mec.num_queue_per_pipe = 8;
2125 
2126 	/* EOP Event */
2127 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2128 	if (r)
2129 		return r;
2130 
2131 	/* Privileged reg */
2132 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2133 			      &adev->gfx.priv_reg_irq);
2134 	if (r)
2135 		return r;
2136 
2137 	/* Privileged inst */
2138 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2139 			      &adev->gfx.priv_inst_irq);
2140 	if (r)
2141 		return r;
2142 
2143 	/* ECC error */
2144 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2145 			      &adev->gfx.cp_ecc_error_irq);
2146 	if (r)
2147 		return r;
2148 
2149 	/* FUE error */
2150 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2151 			      &adev->gfx.cp_ecc_error_irq);
2152 	if (r)
2153 		return r;
2154 
2155 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2156 
2157 	r = gfx_v9_0_init_microcode(adev);
2158 	if (r) {
2159 		DRM_ERROR("Failed to load gfx firmware!\n");
2160 		return r;
2161 	}
2162 
2163 	if (adev->gfx.rlc.funcs) {
2164 		if (adev->gfx.rlc.funcs->init) {
2165 			r = adev->gfx.rlc.funcs->init(adev);
2166 			if (r) {
2167 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2168 				return r;
2169 			}
2170 		}
2171 	}
2172 
2173 	r = gfx_v9_0_mec_init(adev);
2174 	if (r) {
2175 		DRM_ERROR("Failed to init MEC BOs!\n");
2176 		return r;
2177 	}
2178 
2179 	/* set up the gfx ring */
2180 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2181 		ring = &adev->gfx.gfx_ring[i];
2182 		ring->ring_obj = NULL;
2183 		if (!i)
2184 			sprintf(ring->name, "gfx");
2185 		else
2186 			sprintf(ring->name, "gfx_%d", i);
2187 		ring->use_doorbell = true;
2188 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2189 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2190 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2191 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2192 		if (r)
2193 			return r;
2194 	}
2195 
2196 	/* set up the compute queues - allocate horizontally across pipes */
2197 	ring_id = 0;
2198 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2199 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2200 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2201 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2202 					continue;
2203 
2204 				r = gfx_v9_0_compute_ring_init(adev,
2205 							       ring_id,
2206 							       i, k, j);
2207 				if (r)
2208 					return r;
2209 
2210 				ring_id++;
2211 			}
2212 		}
2213 	}
2214 
2215 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2216 	if (r) {
2217 		DRM_ERROR("Failed to init KIQ BOs!\n");
2218 		return r;
2219 	}
2220 
2221 	kiq = &adev->gfx.kiq;
2222 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2223 	if (r)
2224 		return r;
2225 
2226 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2227 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2228 	if (r)
2229 		return r;
2230 
2231 	adev->gfx.ce_ram_size = 0x8000;
2232 
2233 	r = gfx_v9_0_gpu_early_init(adev);
2234 	if (r)
2235 		return r;
2236 
2237 	return 0;
2238 }
2239 
2240 
2241 static int gfx_v9_0_sw_fini(void *handle)
2242 {
2243 	int i;
2244 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2245 
2246 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2247 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2248 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2249 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2250 
2251 	amdgpu_gfx_mqd_sw_fini(adev);
2252 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2253 	amdgpu_gfx_kiq_fini(adev);
2254 
2255 	gfx_v9_0_mec_fini(adev);
2256 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2257 				&adev->gfx.rlc.clear_state_gpu_addr,
2258 				(void **)&adev->gfx.rlc.cs_ptr);
2259 	if (adev->flags & AMD_IS_APU) {
2260 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2261 				&adev->gfx.rlc.cp_table_gpu_addr,
2262 				(void **)&adev->gfx.rlc.cp_table_ptr);
2263 	}
2264 	gfx_v9_0_free_microcode(adev);
2265 
2266 	return 0;
2267 }
2268 
2269 
2270 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2271 {
2272 	/* TODO */
2273 }
2274 
2275 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2276 			   u32 instance)
2277 {
2278 	u32 data;
2279 
2280 	if (instance == 0xffffffff)
2281 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2282 	else
2283 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2284 
2285 	if (se_num == 0xffffffff)
2286 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2287 	else
2288 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2289 
2290 	if (sh_num == 0xffffffff)
2291 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2292 	else
2293 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2294 
2295 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2296 }
2297 
2298 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2299 {
2300 	u32 data, mask;
2301 
2302 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2303 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2304 
2305 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2306 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2307 
2308 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2309 					 adev->gfx.config.max_sh_per_se);
2310 
2311 	return (~data) & mask;
2312 }
2313 
2314 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2315 {
2316 	int i, j;
2317 	u32 data;
2318 	u32 active_rbs = 0;
2319 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2320 					adev->gfx.config.max_sh_per_se;
2321 
2322 	mutex_lock(&adev->grbm_idx_mutex);
2323 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2324 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2325 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2326 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2327 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2328 					       rb_bitmap_width_per_sh);
2329 		}
2330 	}
2331 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2332 	mutex_unlock(&adev->grbm_idx_mutex);
2333 
2334 	adev->gfx.config.backend_enable_mask = active_rbs;
2335 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2336 }
2337 
2338 #define DEFAULT_SH_MEM_BASES	(0x6000)
2339 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2340 {
2341 	int i;
2342 	uint32_t sh_mem_config;
2343 	uint32_t sh_mem_bases;
2344 
2345 	/*
2346 	 * Configure apertures:
2347 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2348 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2349 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2350 	 */
2351 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2352 
2353 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2354 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2355 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2356 
2357 	mutex_lock(&adev->srbm_mutex);
2358 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2359 		soc15_grbm_select(adev, 0, 0, 0, i);
2360 		/* CP and shaders */
2361 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2362 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2363 	}
2364 	soc15_grbm_select(adev, 0, 0, 0, 0);
2365 	mutex_unlock(&adev->srbm_mutex);
2366 
2367 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2368 	   access. These should be enabled by FW for target VMIDs. */
2369 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2370 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2371 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2372 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2373 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2374 	}
2375 }
2376 
2377 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2378 {
2379 	int vmid;
2380 
2381 	/*
2382 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2383 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2384 	 * the driver can enable them for graphics. VMID0 should maintain
2385 	 * access so that HWS firmware can save/restore entries.
2386 	 */
2387 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2388 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2389 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2390 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2391 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2392 	}
2393 }
2394 
2395 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2396 {
2397 	uint32_t tmp;
2398 
2399 	switch (adev->ip_versions[GC_HWIP][0]) {
2400 	case IP_VERSION(9, 4, 1):
2401 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2402 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2403 					DISABLE_BARRIER_WAITCNT, 1);
2404 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2405 		break;
2406 	default:
2407 		break;
2408 	}
2409 }
2410 
2411 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2412 {
2413 	u32 tmp;
2414 	int i;
2415 
2416 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2417 
2418 	gfx_v9_0_tiling_mode_table_init(adev);
2419 
2420 	if (adev->gfx.num_gfx_rings)
2421 		gfx_v9_0_setup_rb(adev);
2422 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2423 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2424 
2425 	/* XXX SH_MEM regs */
2426 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2427 	mutex_lock(&adev->srbm_mutex);
2428 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2429 		soc15_grbm_select(adev, 0, 0, 0, i);
2430 		/* CP and shaders */
2431 		if (i == 0) {
2432 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2433 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2434 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2435 					    !!adev->gmc.noretry);
2436 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2437 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2438 		} else {
2439 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2440 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2441 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2442 					    !!adev->gmc.noretry);
2443 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2444 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2445 				(adev->gmc.private_aperture_start >> 48));
2446 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2447 				(adev->gmc.shared_aperture_start >> 48));
2448 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2449 		}
2450 	}
2451 	soc15_grbm_select(adev, 0, 0, 0, 0);
2452 
2453 	mutex_unlock(&adev->srbm_mutex);
2454 
2455 	gfx_v9_0_init_compute_vmid(adev);
2456 	gfx_v9_0_init_gds_vmid(adev);
2457 	gfx_v9_0_init_sq_config(adev);
2458 }
2459 
2460 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2461 {
2462 	u32 i, j, k;
2463 	u32 mask;
2464 
2465 	mutex_lock(&adev->grbm_idx_mutex);
2466 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2467 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2468 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2469 			for (k = 0; k < adev->usec_timeout; k++) {
2470 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2471 					break;
2472 				udelay(1);
2473 			}
2474 			if (k == adev->usec_timeout) {
2475 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2476 						      0xffffffff, 0xffffffff);
2477 				mutex_unlock(&adev->grbm_idx_mutex);
2478 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2479 					 i, j);
2480 				return;
2481 			}
2482 		}
2483 	}
2484 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2485 	mutex_unlock(&adev->grbm_idx_mutex);
2486 
2487 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2488 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2489 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2490 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2491 	for (k = 0; k < adev->usec_timeout; k++) {
2492 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2493 			break;
2494 		udelay(1);
2495 	}
2496 }
2497 
2498 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2499 					       bool enable)
2500 {
2501 	u32 tmp;
2502 
2503 	/* These interrupts should be enabled to drive DS clock */
2504 
2505 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2506 
2507 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2508 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2509 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2510 	if(adev->gfx.num_gfx_rings)
2511 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2512 
2513 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2514 }
2515 
2516 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2517 {
2518 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2519 	/* csib */
2520 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2521 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2522 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2523 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2524 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2525 			adev->gfx.rlc.clear_state_size);
2526 }
2527 
2528 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2529 				int indirect_offset,
2530 				int list_size,
2531 				int *unique_indirect_regs,
2532 				int unique_indirect_reg_count,
2533 				int *indirect_start_offsets,
2534 				int *indirect_start_offsets_count,
2535 				int max_start_offsets_count)
2536 {
2537 	int idx;
2538 
2539 	for (; indirect_offset < list_size; indirect_offset++) {
2540 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2541 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2542 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2543 
2544 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2545 			indirect_offset += 2;
2546 
2547 			/* look for the matching indice */
2548 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2549 				if (unique_indirect_regs[idx] ==
2550 					register_list_format[indirect_offset] ||
2551 					!unique_indirect_regs[idx])
2552 					break;
2553 			}
2554 
2555 			BUG_ON(idx >= unique_indirect_reg_count);
2556 
2557 			if (!unique_indirect_regs[idx])
2558 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2559 
2560 			indirect_offset++;
2561 		}
2562 	}
2563 }
2564 
2565 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2566 {
2567 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2568 	int unique_indirect_reg_count = 0;
2569 
2570 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2571 	int indirect_start_offsets_count = 0;
2572 
2573 	int list_size = 0;
2574 	int i = 0, j = 0;
2575 	u32 tmp = 0;
2576 
2577 	u32 *register_list_format =
2578 		kmemdup(adev->gfx.rlc.register_list_format,
2579 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2580 	if (!register_list_format)
2581 		return -ENOMEM;
2582 
2583 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2584 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2585 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2586 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2587 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2588 				    unique_indirect_regs,
2589 				    unique_indirect_reg_count,
2590 				    indirect_start_offsets,
2591 				    &indirect_start_offsets_count,
2592 				    ARRAY_SIZE(indirect_start_offsets));
2593 
2594 	/* enable auto inc in case it is disabled */
2595 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2596 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2597 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2598 
2599 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2600 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2601 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2602 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2603 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2604 			adev->gfx.rlc.register_restore[i]);
2605 
2606 	/* load indirect register */
2607 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2608 		adev->gfx.rlc.reg_list_format_start);
2609 
2610 	/* direct register portion */
2611 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2612 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2613 			register_list_format[i]);
2614 
2615 	/* indirect register portion */
2616 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2617 		if (register_list_format[i] == 0xFFFFFFFF) {
2618 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2619 			continue;
2620 		}
2621 
2622 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2623 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2624 
2625 		for (j = 0; j < unique_indirect_reg_count; j++) {
2626 			if (register_list_format[i] == unique_indirect_regs[j]) {
2627 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2628 				break;
2629 			}
2630 		}
2631 
2632 		BUG_ON(j >= unique_indirect_reg_count);
2633 
2634 		i++;
2635 	}
2636 
2637 	/* set save/restore list size */
2638 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2639 	list_size = list_size >> 1;
2640 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2641 		adev->gfx.rlc.reg_restore_list_size);
2642 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2643 
2644 	/* write the starting offsets to RLC scratch ram */
2645 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2646 		adev->gfx.rlc.starting_offsets_start);
2647 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2648 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2649 		       indirect_start_offsets[i]);
2650 
2651 	/* load unique indirect regs*/
2652 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2653 		if (unique_indirect_regs[i] != 0) {
2654 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2655 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2656 			       unique_indirect_regs[i] & 0x3FFFF);
2657 
2658 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2659 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2660 			       unique_indirect_regs[i] >> 20);
2661 		}
2662 	}
2663 
2664 	kfree(register_list_format);
2665 	return 0;
2666 }
2667 
2668 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2669 {
2670 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2671 }
2672 
2673 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2674 					     bool enable)
2675 {
2676 	uint32_t data = 0;
2677 	uint32_t default_data = 0;
2678 
2679 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2680 	if (enable) {
2681 		/* enable GFXIP control over CGPG */
2682 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2683 		if(default_data != data)
2684 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2685 
2686 		/* update status */
2687 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2688 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2689 		if(default_data != data)
2690 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2691 	} else {
2692 		/* restore GFXIP control over GCPG */
2693 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2694 		if(default_data != data)
2695 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2696 	}
2697 }
2698 
2699 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2700 {
2701 	uint32_t data = 0;
2702 
2703 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2704 			      AMD_PG_SUPPORT_GFX_SMG |
2705 			      AMD_PG_SUPPORT_GFX_DMG)) {
2706 		/* init IDLE_POLL_COUNT = 60 */
2707 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2708 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2709 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2710 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2711 
2712 		/* init RLC PG Delay */
2713 		data = 0;
2714 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2715 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2716 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2717 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2718 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2719 
2720 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2721 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2722 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2723 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2724 
2725 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2726 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2727 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2728 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2729 
2730 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2731 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2732 
2733 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2734 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2735 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2736 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2737 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2738 	}
2739 }
2740 
2741 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2742 						bool enable)
2743 {
2744 	uint32_t data = 0;
2745 	uint32_t default_data = 0;
2746 
2747 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2748 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2749 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2750 			     enable ? 1 : 0);
2751 	if (default_data != data)
2752 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2753 }
2754 
2755 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2756 						bool enable)
2757 {
2758 	uint32_t data = 0;
2759 	uint32_t default_data = 0;
2760 
2761 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2762 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2763 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2764 			     enable ? 1 : 0);
2765 	if(default_data != data)
2766 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2767 }
2768 
2769 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2770 					bool enable)
2771 {
2772 	uint32_t data = 0;
2773 	uint32_t default_data = 0;
2774 
2775 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2776 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2777 			     CP_PG_DISABLE,
2778 			     enable ? 0 : 1);
2779 	if(default_data != data)
2780 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2781 }
2782 
2783 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2784 						bool enable)
2785 {
2786 	uint32_t data, default_data;
2787 
2788 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2789 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2790 			     GFX_POWER_GATING_ENABLE,
2791 			     enable ? 1 : 0);
2792 	if(default_data != data)
2793 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2794 }
2795 
2796 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2797 						bool enable)
2798 {
2799 	uint32_t data, default_data;
2800 
2801 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2802 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2803 			     GFX_PIPELINE_PG_ENABLE,
2804 			     enable ? 1 : 0);
2805 	if(default_data != data)
2806 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2807 
2808 	if (!enable)
2809 		/* read any GFX register to wake up GFX */
2810 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2811 }
2812 
2813 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2814 						       bool enable)
2815 {
2816 	uint32_t data, default_data;
2817 
2818 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2819 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2820 			     STATIC_PER_CU_PG_ENABLE,
2821 			     enable ? 1 : 0);
2822 	if(default_data != data)
2823 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2824 }
2825 
2826 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2827 						bool enable)
2828 {
2829 	uint32_t data, default_data;
2830 
2831 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2832 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2833 			     DYN_PER_CU_PG_ENABLE,
2834 			     enable ? 1 : 0);
2835 	if(default_data != data)
2836 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2837 }
2838 
2839 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2840 {
2841 	gfx_v9_0_init_csb(adev);
2842 
2843 	/*
2844 	 * Rlc save restore list is workable since v2_1.
2845 	 * And it's needed by gfxoff feature.
2846 	 */
2847 	if (adev->gfx.rlc.is_rlc_v2_1) {
2848 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2849 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2850 			gfx_v9_1_init_rlc_save_restore_list(adev);
2851 		gfx_v9_0_enable_save_restore_machine(adev);
2852 	}
2853 
2854 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2855 			      AMD_PG_SUPPORT_GFX_SMG |
2856 			      AMD_PG_SUPPORT_GFX_DMG |
2857 			      AMD_PG_SUPPORT_CP |
2858 			      AMD_PG_SUPPORT_GDS |
2859 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2860 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2861 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2862 		gfx_v9_0_init_gfx_power_gating(adev);
2863 	}
2864 }
2865 
2866 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2867 {
2868 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2869 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2870 	gfx_v9_0_wait_for_rlc_serdes(adev);
2871 }
2872 
2873 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2874 {
2875 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2876 	udelay(50);
2877 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2878 	udelay(50);
2879 }
2880 
2881 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2882 {
2883 #ifdef AMDGPU_RLC_DEBUG_RETRY
2884 	u32 rlc_ucode_ver;
2885 #endif
2886 
2887 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2888 	udelay(50);
2889 
2890 	/* carrizo do enable cp interrupt after cp inited */
2891 	if (!(adev->flags & AMD_IS_APU)) {
2892 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2893 		udelay(50);
2894 	}
2895 
2896 #ifdef AMDGPU_RLC_DEBUG_RETRY
2897 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2898 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2899 	if(rlc_ucode_ver == 0x108) {
2900 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2901 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2902 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2903 		 * default is 0x9C4 to create a 100us interval */
2904 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2905 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2906 		 * to disable the page fault retry interrupts, default is
2907 		 * 0x100 (256) */
2908 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2909 	}
2910 #endif
2911 }
2912 
2913 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2914 {
2915 	const struct rlc_firmware_header_v2_0 *hdr;
2916 	const __le32 *fw_data;
2917 	unsigned i, fw_size;
2918 
2919 	if (!adev->gfx.rlc_fw)
2920 		return -EINVAL;
2921 
2922 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2923 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2924 
2925 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2926 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2927 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2928 
2929 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2930 			RLCG_UCODE_LOADING_START_ADDRESS);
2931 	for (i = 0; i < fw_size; i++)
2932 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2933 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2934 
2935 	return 0;
2936 }
2937 
2938 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2939 {
2940 	int r;
2941 
2942 	if (amdgpu_sriov_vf(adev)) {
2943 		gfx_v9_0_init_csb(adev);
2944 		return 0;
2945 	}
2946 
2947 	adev->gfx.rlc.funcs->stop(adev);
2948 
2949 	/* disable CG */
2950 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2951 
2952 	gfx_v9_0_init_pg(adev);
2953 
2954 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2955 		/* legacy rlc firmware loading */
2956 		r = gfx_v9_0_rlc_load_microcode(adev);
2957 		if (r)
2958 			return r;
2959 	}
2960 
2961 	switch (adev->ip_versions[GC_HWIP][0]) {
2962 	case IP_VERSION(9, 2, 2):
2963 	case IP_VERSION(9, 1, 0):
2964 		if (amdgpu_lbpw == 0)
2965 			gfx_v9_0_enable_lbpw(adev, false);
2966 		else
2967 			gfx_v9_0_enable_lbpw(adev, true);
2968 		break;
2969 	case IP_VERSION(9, 4, 0):
2970 		if (amdgpu_lbpw > 0)
2971 			gfx_v9_0_enable_lbpw(adev, true);
2972 		else
2973 			gfx_v9_0_enable_lbpw(adev, false);
2974 		break;
2975 	default:
2976 		break;
2977 	}
2978 
2979 	adev->gfx.rlc.funcs->start(adev);
2980 
2981 	return 0;
2982 }
2983 
2984 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2985 {
2986 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2987 
2988 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2989 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2990 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2991 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2992 	udelay(50);
2993 }
2994 
2995 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2996 {
2997 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2998 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2999 	const struct gfx_firmware_header_v1_0 *me_hdr;
3000 	const __le32 *fw_data;
3001 	unsigned i, fw_size;
3002 
3003 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3004 		return -EINVAL;
3005 
3006 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3007 		adev->gfx.pfp_fw->data;
3008 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3009 		adev->gfx.ce_fw->data;
3010 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3011 		adev->gfx.me_fw->data;
3012 
3013 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3014 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3015 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3016 
3017 	gfx_v9_0_cp_gfx_enable(adev, false);
3018 
3019 	/* PFP */
3020 	fw_data = (const __le32 *)
3021 		(adev->gfx.pfp_fw->data +
3022 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3023 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3024 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3025 	for (i = 0; i < fw_size; i++)
3026 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3027 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3028 
3029 	/* CE */
3030 	fw_data = (const __le32 *)
3031 		(adev->gfx.ce_fw->data +
3032 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3033 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3034 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3035 	for (i = 0; i < fw_size; i++)
3036 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3037 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3038 
3039 	/* ME */
3040 	fw_data = (const __le32 *)
3041 		(adev->gfx.me_fw->data +
3042 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3043 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3044 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3045 	for (i = 0; i < fw_size; i++)
3046 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3047 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3048 
3049 	return 0;
3050 }
3051 
3052 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3053 {
3054 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3055 	const struct cs_section_def *sect = NULL;
3056 	const struct cs_extent_def *ext = NULL;
3057 	int r, i, tmp;
3058 
3059 	/* init the CP */
3060 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3061 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3062 
3063 	gfx_v9_0_cp_gfx_enable(adev, true);
3064 
3065 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3066 	if (r) {
3067 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068 		return r;
3069 	}
3070 
3071 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3072 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3073 
3074 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3075 	amdgpu_ring_write(ring, 0x80000000);
3076 	amdgpu_ring_write(ring, 0x80000000);
3077 
3078 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3079 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3080 			if (sect->id == SECT_CONTEXT) {
3081 				amdgpu_ring_write(ring,
3082 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3083 					       ext->reg_count));
3084 				amdgpu_ring_write(ring,
3085 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3086 				for (i = 0; i < ext->reg_count; i++)
3087 					amdgpu_ring_write(ring, ext->extent[i]);
3088 			}
3089 		}
3090 	}
3091 
3092 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3093 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3094 
3095 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3096 	amdgpu_ring_write(ring, 0);
3097 
3098 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3099 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3100 	amdgpu_ring_write(ring, 0x8000);
3101 	amdgpu_ring_write(ring, 0x8000);
3102 
3103 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3104 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3105 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3106 	amdgpu_ring_write(ring, tmp);
3107 	amdgpu_ring_write(ring, 0);
3108 
3109 	amdgpu_ring_commit(ring);
3110 
3111 	return 0;
3112 }
3113 
3114 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3115 {
3116 	struct amdgpu_ring *ring;
3117 	u32 tmp;
3118 	u32 rb_bufsz;
3119 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3120 
3121 	/* Set the write pointer delay */
3122 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3123 
3124 	/* set the RB to use vmid 0 */
3125 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3126 
3127 	/* Set ring buffer size */
3128 	ring = &adev->gfx.gfx_ring[0];
3129 	rb_bufsz = order_base_2(ring->ring_size / 8);
3130 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3131 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3132 #ifdef __BIG_ENDIAN
3133 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3134 #endif
3135 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3136 
3137 	/* Initialize the ring buffer's write pointers */
3138 	ring->wptr = 0;
3139 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3140 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3141 
3142 	/* set the wb address wether it's enabled or not */
3143 	rptr_addr = ring->rptr_gpu_addr;
3144 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3145 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3146 
3147 	wptr_gpu_addr = ring->wptr_gpu_addr;
3148 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3149 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3150 
3151 	mdelay(1);
3152 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3153 
3154 	rb_addr = ring->gpu_addr >> 8;
3155 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3156 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3157 
3158 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3159 	if (ring->use_doorbell) {
3160 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3161 				    DOORBELL_OFFSET, ring->doorbell_index);
3162 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3163 				    DOORBELL_EN, 1);
3164 	} else {
3165 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3166 	}
3167 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3168 
3169 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3170 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3171 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3172 
3173 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3174 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3175 
3176 
3177 	/* start the ring */
3178 	gfx_v9_0_cp_gfx_start(adev);
3179 	ring->sched.ready = true;
3180 
3181 	return 0;
3182 }
3183 
3184 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3185 {
3186 	if (enable) {
3187 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3188 	} else {
3189 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3190 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3191 		adev->gfx.kiq.ring.sched.ready = false;
3192 	}
3193 	udelay(50);
3194 }
3195 
3196 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3197 {
3198 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3199 	const __le32 *fw_data;
3200 	unsigned i;
3201 	u32 tmp;
3202 
3203 	if (!adev->gfx.mec_fw)
3204 		return -EINVAL;
3205 
3206 	gfx_v9_0_cp_compute_enable(adev, false);
3207 
3208 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3209 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3210 
3211 	fw_data = (const __le32 *)
3212 		(adev->gfx.mec_fw->data +
3213 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3214 	tmp = 0;
3215 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3216 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3217 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3218 
3219 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3220 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3221 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3222 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3223 
3224 	/* MEC1 */
3225 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3226 			 mec_hdr->jt_offset);
3227 	for (i = 0; i < mec_hdr->jt_size; i++)
3228 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3229 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3230 
3231 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3232 			adev->gfx.mec_fw_version);
3233 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3234 
3235 	return 0;
3236 }
3237 
3238 /* KIQ functions */
3239 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3240 {
3241 	uint32_t tmp;
3242 	struct amdgpu_device *adev = ring->adev;
3243 
3244 	/* tell RLC which is KIQ queue */
3245 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3246 	tmp &= 0xffffff00;
3247 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3248 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3249 	tmp |= 0x80;
3250 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3251 }
3252 
3253 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3254 {
3255 	struct amdgpu_device *adev = ring->adev;
3256 
3257 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3258 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3259 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3260 			mqd->cp_hqd_queue_priority =
3261 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3262 		}
3263 	}
3264 }
3265 
3266 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3267 {
3268 	struct amdgpu_device *adev = ring->adev;
3269 	struct v9_mqd *mqd = ring->mqd_ptr;
3270 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3271 	uint32_t tmp;
3272 
3273 	mqd->header = 0xC0310800;
3274 	mqd->compute_pipelinestat_enable = 0x00000001;
3275 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3276 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3277 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3278 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3279 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3280 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3281 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3282 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3283 	mqd->compute_misc_reserved = 0x00000003;
3284 
3285 	mqd->dynamic_cu_mask_addr_lo =
3286 		lower_32_bits(ring->mqd_gpu_addr
3287 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3288 	mqd->dynamic_cu_mask_addr_hi =
3289 		upper_32_bits(ring->mqd_gpu_addr
3290 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3291 
3292 	eop_base_addr = ring->eop_gpu_addr >> 8;
3293 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3294 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3295 
3296 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3297 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3298 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3299 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3300 
3301 	mqd->cp_hqd_eop_control = tmp;
3302 
3303 	/* enable doorbell? */
3304 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3305 
3306 	if (ring->use_doorbell) {
3307 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3308 				    DOORBELL_OFFSET, ring->doorbell_index);
3309 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3310 				    DOORBELL_EN, 1);
3311 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3312 				    DOORBELL_SOURCE, 0);
3313 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3314 				    DOORBELL_HIT, 0);
3315 	} else {
3316 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3317 					 DOORBELL_EN, 0);
3318 	}
3319 
3320 	mqd->cp_hqd_pq_doorbell_control = tmp;
3321 
3322 	/* disable the queue if it's active */
3323 	ring->wptr = 0;
3324 	mqd->cp_hqd_dequeue_request = 0;
3325 	mqd->cp_hqd_pq_rptr = 0;
3326 	mqd->cp_hqd_pq_wptr_lo = 0;
3327 	mqd->cp_hqd_pq_wptr_hi = 0;
3328 
3329 	/* set the pointer to the MQD */
3330 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3331 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3332 
3333 	/* set MQD vmid to 0 */
3334 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3335 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3336 	mqd->cp_mqd_control = tmp;
3337 
3338 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3339 	hqd_gpu_addr = ring->gpu_addr >> 8;
3340 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3341 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3342 
3343 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3344 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3345 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3346 			    (order_base_2(ring->ring_size / 4) - 1));
3347 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3348 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3349 #ifdef __BIG_ENDIAN
3350 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3351 #endif
3352 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3353 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3354 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3355 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3356 	mqd->cp_hqd_pq_control = tmp;
3357 
3358 	/* set the wb address whether it's enabled or not */
3359 	wb_gpu_addr = ring->rptr_gpu_addr;
3360 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3361 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3362 		upper_32_bits(wb_gpu_addr) & 0xffff;
3363 
3364 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3365 	wb_gpu_addr = ring->wptr_gpu_addr;
3366 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3367 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3368 
3369 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3370 	ring->wptr = 0;
3371 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3372 
3373 	/* set the vmid for the queue */
3374 	mqd->cp_hqd_vmid = 0;
3375 
3376 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3377 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3378 	mqd->cp_hqd_persistent_state = tmp;
3379 
3380 	/* set MIN_IB_AVAIL_SIZE */
3381 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3382 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3383 	mqd->cp_hqd_ib_control = tmp;
3384 
3385 	/* set static priority for a queue/ring */
3386 	gfx_v9_0_mqd_set_priority(ring, mqd);
3387 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3388 
3389 	/* map_queues packet doesn't need activate the queue,
3390 	 * so only kiq need set this field.
3391 	 */
3392 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3393 		mqd->cp_hqd_active = 1;
3394 
3395 	return 0;
3396 }
3397 
3398 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3399 {
3400 	struct amdgpu_device *adev = ring->adev;
3401 	struct v9_mqd *mqd = ring->mqd_ptr;
3402 	int j;
3403 
3404 	/* disable wptr polling */
3405 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3406 
3407 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3408 	       mqd->cp_hqd_eop_base_addr_lo);
3409 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3410 	       mqd->cp_hqd_eop_base_addr_hi);
3411 
3412 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3413 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3414 	       mqd->cp_hqd_eop_control);
3415 
3416 	/* enable doorbell? */
3417 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3418 	       mqd->cp_hqd_pq_doorbell_control);
3419 
3420 	/* disable the queue if it's active */
3421 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3422 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3423 		for (j = 0; j < adev->usec_timeout; j++) {
3424 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3425 				break;
3426 			udelay(1);
3427 		}
3428 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3429 		       mqd->cp_hqd_dequeue_request);
3430 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3431 		       mqd->cp_hqd_pq_rptr);
3432 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3433 		       mqd->cp_hqd_pq_wptr_lo);
3434 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3435 		       mqd->cp_hqd_pq_wptr_hi);
3436 	}
3437 
3438 	/* set the pointer to the MQD */
3439 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3440 	       mqd->cp_mqd_base_addr_lo);
3441 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3442 	       mqd->cp_mqd_base_addr_hi);
3443 
3444 	/* set MQD vmid to 0 */
3445 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3446 	       mqd->cp_mqd_control);
3447 
3448 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3449 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3450 	       mqd->cp_hqd_pq_base_lo);
3451 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3452 	       mqd->cp_hqd_pq_base_hi);
3453 
3454 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3455 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3456 	       mqd->cp_hqd_pq_control);
3457 
3458 	/* set the wb address whether it's enabled or not */
3459 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3460 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3462 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3463 
3464 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3465 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3466 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3468 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3469 
3470 	/* enable the doorbell if requested */
3471 	if (ring->use_doorbell) {
3472 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3473 					(adev->doorbell_index.kiq * 2) << 2);
3474 		/* If GC has entered CGPG, ringing doorbell > first page
3475 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3476 		 * workaround this issue. And this change has to align with firmware
3477 		 * update.
3478 		 */
3479 		if (check_if_enlarge_doorbell_range(adev))
3480 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3481 					(adev->doorbell.size - 4));
3482 		else
3483 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3484 					(adev->doorbell_index.userqueue_end * 2) << 2);
3485 	}
3486 
3487 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3488 	       mqd->cp_hqd_pq_doorbell_control);
3489 
3490 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3491 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3492 	       mqd->cp_hqd_pq_wptr_lo);
3493 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3494 	       mqd->cp_hqd_pq_wptr_hi);
3495 
3496 	/* set the vmid for the queue */
3497 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3498 
3499 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3500 	       mqd->cp_hqd_persistent_state);
3501 
3502 	/* activate the queue */
3503 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3504 	       mqd->cp_hqd_active);
3505 
3506 	if (ring->use_doorbell)
3507 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3508 
3509 	return 0;
3510 }
3511 
3512 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3513 {
3514 	struct amdgpu_device *adev = ring->adev;
3515 	int j;
3516 
3517 	/* disable the queue if it's active */
3518 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3519 
3520 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3521 
3522 		for (j = 0; j < adev->usec_timeout; j++) {
3523 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3524 				break;
3525 			udelay(1);
3526 		}
3527 
3528 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3529 			DRM_DEBUG("KIQ dequeue request failed.\n");
3530 
3531 			/* Manual disable if dequeue request times out */
3532 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3533 		}
3534 
3535 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3536 		      0);
3537 	}
3538 
3539 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3540 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3541 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3542 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3543 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3544 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3545 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3546 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3547 
3548 	return 0;
3549 }
3550 
3551 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3552 {
3553 	struct amdgpu_device *adev = ring->adev;
3554 	struct v9_mqd *mqd = ring->mqd_ptr;
3555 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3556 	struct v9_mqd *tmp_mqd;
3557 
3558 	gfx_v9_0_kiq_setting(ring);
3559 
3560 	/* GPU could be in bad state during probe, driver trigger the reset
3561 	 * after load the SMU, in this case , the mqd is not be initialized.
3562 	 * driver need to re-init the mqd.
3563 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3564 	 */
3565 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3566 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3567 		/* for GPU_RESET case , reset MQD to a clean status */
3568 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3569 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3570 
3571 		/* reset ring buffer */
3572 		ring->wptr = 0;
3573 		amdgpu_ring_clear_ring(ring);
3574 
3575 		mutex_lock(&adev->srbm_mutex);
3576 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3577 		gfx_v9_0_kiq_init_register(ring);
3578 		soc15_grbm_select(adev, 0, 0, 0, 0);
3579 		mutex_unlock(&adev->srbm_mutex);
3580 	} else {
3581 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3582 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3583 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3584 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3585 			amdgpu_ring_clear_ring(ring);
3586 		mutex_lock(&adev->srbm_mutex);
3587 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3588 		gfx_v9_0_mqd_init(ring);
3589 		gfx_v9_0_kiq_init_register(ring);
3590 		soc15_grbm_select(adev, 0, 0, 0, 0);
3591 		mutex_unlock(&adev->srbm_mutex);
3592 
3593 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3594 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3595 	}
3596 
3597 	return 0;
3598 }
3599 
3600 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3601 {
3602 	struct amdgpu_device *adev = ring->adev;
3603 	struct v9_mqd *mqd = ring->mqd_ptr;
3604 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3605 	struct v9_mqd *tmp_mqd;
3606 
3607 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3608 	 * is not be initialized before
3609 	 */
3610 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3611 
3612 	if (!tmp_mqd->cp_hqd_pq_control ||
3613 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3614 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3615 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3616 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3617 		mutex_lock(&adev->srbm_mutex);
3618 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3619 		gfx_v9_0_mqd_init(ring);
3620 		soc15_grbm_select(adev, 0, 0, 0, 0);
3621 		mutex_unlock(&adev->srbm_mutex);
3622 
3623 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3624 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3625 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3626 		/* reset MQD to a clean status */
3627 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3628 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3629 
3630 		/* reset ring buffer */
3631 		ring->wptr = 0;
3632 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3633 		amdgpu_ring_clear_ring(ring);
3634 	} else {
3635 		amdgpu_ring_clear_ring(ring);
3636 	}
3637 
3638 	return 0;
3639 }
3640 
3641 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3642 {
3643 	struct amdgpu_ring *ring;
3644 	int r;
3645 
3646 	ring = &adev->gfx.kiq.ring;
3647 
3648 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3649 	if (unlikely(r != 0))
3650 		return r;
3651 
3652 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3653 	if (unlikely(r != 0))
3654 		return r;
3655 
3656 	gfx_v9_0_kiq_init_queue(ring);
3657 	amdgpu_bo_kunmap(ring->mqd_obj);
3658 	ring->mqd_ptr = NULL;
3659 	amdgpu_bo_unreserve(ring->mqd_obj);
3660 	ring->sched.ready = true;
3661 	return 0;
3662 }
3663 
3664 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3665 {
3666 	struct amdgpu_ring *ring = NULL;
3667 	int r = 0, i;
3668 
3669 	gfx_v9_0_cp_compute_enable(adev, true);
3670 
3671 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3672 		ring = &adev->gfx.compute_ring[i];
3673 
3674 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3675 		if (unlikely(r != 0))
3676 			goto done;
3677 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3678 		if (!r) {
3679 			r = gfx_v9_0_kcq_init_queue(ring);
3680 			amdgpu_bo_kunmap(ring->mqd_obj);
3681 			ring->mqd_ptr = NULL;
3682 		}
3683 		amdgpu_bo_unreserve(ring->mqd_obj);
3684 		if (r)
3685 			goto done;
3686 	}
3687 
3688 	r = amdgpu_gfx_enable_kcq(adev);
3689 done:
3690 	return r;
3691 }
3692 
3693 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3694 {
3695 	int r, i;
3696 	struct amdgpu_ring *ring;
3697 
3698 	if (!(adev->flags & AMD_IS_APU))
3699 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3700 
3701 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3702 		if (adev->gfx.num_gfx_rings) {
3703 			/* legacy firmware loading */
3704 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3705 			if (r)
3706 				return r;
3707 		}
3708 
3709 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3710 		if (r)
3711 			return r;
3712 	}
3713 
3714 	r = gfx_v9_0_kiq_resume(adev);
3715 	if (r)
3716 		return r;
3717 
3718 	if (adev->gfx.num_gfx_rings) {
3719 		r = gfx_v9_0_cp_gfx_resume(adev);
3720 		if (r)
3721 			return r;
3722 	}
3723 
3724 	r = gfx_v9_0_kcq_resume(adev);
3725 	if (r)
3726 		return r;
3727 
3728 	if (adev->gfx.num_gfx_rings) {
3729 		ring = &adev->gfx.gfx_ring[0];
3730 		r = amdgpu_ring_test_helper(ring);
3731 		if (r)
3732 			return r;
3733 	}
3734 
3735 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3736 		ring = &adev->gfx.compute_ring[i];
3737 		amdgpu_ring_test_helper(ring);
3738 	}
3739 
3740 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3741 
3742 	return 0;
3743 }
3744 
3745 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3746 {
3747 	u32 tmp;
3748 
3749 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3750 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3751 		return;
3752 
3753 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3754 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3755 				adev->df.hash_status.hash_64k);
3756 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3757 				adev->df.hash_status.hash_2m);
3758 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3759 				adev->df.hash_status.hash_1g);
3760 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3761 }
3762 
3763 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3764 {
3765 	if (adev->gfx.num_gfx_rings)
3766 		gfx_v9_0_cp_gfx_enable(adev, enable);
3767 	gfx_v9_0_cp_compute_enable(adev, enable);
3768 }
3769 
3770 static int gfx_v9_0_hw_init(void *handle)
3771 {
3772 	int r;
3773 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3774 
3775 	if (!amdgpu_sriov_vf(adev))
3776 		gfx_v9_0_init_golden_registers(adev);
3777 
3778 	gfx_v9_0_constants_init(adev);
3779 
3780 	gfx_v9_0_init_tcp_config(adev);
3781 
3782 	r = adev->gfx.rlc.funcs->resume(adev);
3783 	if (r)
3784 		return r;
3785 
3786 	r = gfx_v9_0_cp_resume(adev);
3787 	if (r)
3788 		return r;
3789 
3790 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3791 		gfx_v9_4_2_set_power_brake_sequence(adev);
3792 
3793 	return r;
3794 }
3795 
3796 static int gfx_v9_0_hw_fini(void *handle)
3797 {
3798 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3799 
3800 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3801 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3802 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3803 
3804 	/* DF freeze and kcq disable will fail */
3805 	if (!amdgpu_ras_intr_triggered())
3806 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3807 		amdgpu_gfx_disable_kcq(adev);
3808 
3809 	if (amdgpu_sriov_vf(adev)) {
3810 		gfx_v9_0_cp_gfx_enable(adev, false);
3811 		/* must disable polling for SRIOV when hw finished, otherwise
3812 		 * CPC engine may still keep fetching WB address which is already
3813 		 * invalid after sw finished and trigger DMAR reading error in
3814 		 * hypervisor side.
3815 		 */
3816 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3817 		return 0;
3818 	}
3819 
3820 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3821 	 * otherwise KIQ is hanging when binding back
3822 	 */
3823 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3824 		mutex_lock(&adev->srbm_mutex);
3825 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3826 				adev->gfx.kiq.ring.pipe,
3827 				adev->gfx.kiq.ring.queue, 0);
3828 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3829 		soc15_grbm_select(adev, 0, 0, 0, 0);
3830 		mutex_unlock(&adev->srbm_mutex);
3831 	}
3832 
3833 	gfx_v9_0_cp_enable(adev, false);
3834 
3835 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3836 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3837 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3838 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3839 		return 0;
3840 	}
3841 
3842 	adev->gfx.rlc.funcs->stop(adev);
3843 	return 0;
3844 }
3845 
3846 static int gfx_v9_0_suspend(void *handle)
3847 {
3848 	return gfx_v9_0_hw_fini(handle);
3849 }
3850 
3851 static int gfx_v9_0_resume(void *handle)
3852 {
3853 	return gfx_v9_0_hw_init(handle);
3854 }
3855 
3856 static bool gfx_v9_0_is_idle(void *handle)
3857 {
3858 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3859 
3860 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3861 				GRBM_STATUS, GUI_ACTIVE))
3862 		return false;
3863 	else
3864 		return true;
3865 }
3866 
3867 static int gfx_v9_0_wait_for_idle(void *handle)
3868 {
3869 	unsigned i;
3870 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3871 
3872 	for (i = 0; i < adev->usec_timeout; i++) {
3873 		if (gfx_v9_0_is_idle(handle))
3874 			return 0;
3875 		udelay(1);
3876 	}
3877 	return -ETIMEDOUT;
3878 }
3879 
3880 static int gfx_v9_0_soft_reset(void *handle)
3881 {
3882 	u32 grbm_soft_reset = 0;
3883 	u32 tmp;
3884 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3885 
3886 	/* GRBM_STATUS */
3887 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3888 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3889 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3890 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3891 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3892 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3893 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3894 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3895 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3896 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3897 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3898 	}
3899 
3900 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3901 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3902 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3903 	}
3904 
3905 	/* GRBM_STATUS2 */
3906 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3907 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3908 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3909 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3910 
3911 
3912 	if (grbm_soft_reset) {
3913 		/* stop the rlc */
3914 		adev->gfx.rlc.funcs->stop(adev);
3915 
3916 		if (adev->gfx.num_gfx_rings)
3917 			/* Disable GFX parsing/prefetching */
3918 			gfx_v9_0_cp_gfx_enable(adev, false);
3919 
3920 		/* Disable MEC parsing/prefetching */
3921 		gfx_v9_0_cp_compute_enable(adev, false);
3922 
3923 		if (grbm_soft_reset) {
3924 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3925 			tmp |= grbm_soft_reset;
3926 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3927 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3928 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3929 
3930 			udelay(50);
3931 
3932 			tmp &= ~grbm_soft_reset;
3933 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3934 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3935 		}
3936 
3937 		/* Wait a little for things to settle down */
3938 		udelay(50);
3939 	}
3940 	return 0;
3941 }
3942 
3943 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3944 {
3945 	signed long r, cnt = 0;
3946 	unsigned long flags;
3947 	uint32_t seq, reg_val_offs = 0;
3948 	uint64_t value = 0;
3949 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3950 	struct amdgpu_ring *ring = &kiq->ring;
3951 
3952 	BUG_ON(!ring->funcs->emit_rreg);
3953 
3954 	spin_lock_irqsave(&kiq->ring_lock, flags);
3955 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3956 		pr_err("critical bug! too many kiq readers\n");
3957 		goto failed_unlock;
3958 	}
3959 	amdgpu_ring_alloc(ring, 32);
3960 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3961 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3962 				(5 << 8) |	/* dst: memory */
3963 				(1 << 16) |	/* count sel */
3964 				(1 << 20));	/* write confirm */
3965 	amdgpu_ring_write(ring, 0);
3966 	amdgpu_ring_write(ring, 0);
3967 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3968 				reg_val_offs * 4));
3969 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3970 				reg_val_offs * 4));
3971 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3972 	if (r)
3973 		goto failed_undo;
3974 
3975 	amdgpu_ring_commit(ring);
3976 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3977 
3978 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3979 
3980 	/* don't wait anymore for gpu reset case because this way may
3981 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3982 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3983 	 * never return if we keep waiting in virt_kiq_rreg, which cause
3984 	 * gpu_recover() hang there.
3985 	 *
3986 	 * also don't wait anymore for IRQ context
3987 	 * */
3988 	if (r < 1 && (amdgpu_in_reset(adev)))
3989 		goto failed_kiq_read;
3990 
3991 	might_sleep();
3992 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3993 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3994 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3995 	}
3996 
3997 	if (cnt > MAX_KIQ_REG_TRY)
3998 		goto failed_kiq_read;
3999 
4000 	mb();
4001 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4002 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4003 	amdgpu_device_wb_free(adev, reg_val_offs);
4004 	return value;
4005 
4006 failed_undo:
4007 	amdgpu_ring_undo(ring);
4008 failed_unlock:
4009 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4010 failed_kiq_read:
4011 	if (reg_val_offs)
4012 		amdgpu_device_wb_free(adev, reg_val_offs);
4013 	pr_err("failed to read gpu clock\n");
4014 	return ~0;
4015 }
4016 
4017 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4018 {
4019 	uint64_t clock, clock_lo, clock_hi, hi_check;
4020 
4021 	switch (adev->ip_versions[GC_HWIP][0]) {
4022 	case IP_VERSION(9, 3, 0):
4023 		preempt_disable();
4024 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4025 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4026 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4027 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4028 		 * roughly every 42 seconds.
4029 		 */
4030 		if (hi_check != clock_hi) {
4031 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4032 			clock_hi = hi_check;
4033 		}
4034 		preempt_enable();
4035 		clock = clock_lo | (clock_hi << 32ULL);
4036 		break;
4037 	default:
4038 		amdgpu_gfx_off_ctrl(adev, false);
4039 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4040 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4041 			clock = gfx_v9_0_kiq_read_clock(adev);
4042 		} else {
4043 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4044 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4045 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4046 		}
4047 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4048 		amdgpu_gfx_off_ctrl(adev, true);
4049 		break;
4050 	}
4051 	return clock;
4052 }
4053 
4054 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4055 					  uint32_t vmid,
4056 					  uint32_t gds_base, uint32_t gds_size,
4057 					  uint32_t gws_base, uint32_t gws_size,
4058 					  uint32_t oa_base, uint32_t oa_size)
4059 {
4060 	struct amdgpu_device *adev = ring->adev;
4061 
4062 	/* GDS Base */
4063 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4064 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4065 				   gds_base);
4066 
4067 	/* GDS Size */
4068 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4069 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4070 				   gds_size);
4071 
4072 	/* GWS */
4073 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4074 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4075 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4076 
4077 	/* OA */
4078 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4079 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4080 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4081 }
4082 
4083 static const u32 vgpr_init_compute_shader[] =
4084 {
4085 	0xb07c0000, 0xbe8000ff,
4086 	0x000000f8, 0xbf110800,
4087 	0x7e000280, 0x7e020280,
4088 	0x7e040280, 0x7e060280,
4089 	0x7e080280, 0x7e0a0280,
4090 	0x7e0c0280, 0x7e0e0280,
4091 	0x80808800, 0xbe803200,
4092 	0xbf84fff5, 0xbf9c0000,
4093 	0xd28c0001, 0x0001007f,
4094 	0xd28d0001, 0x0002027e,
4095 	0x10020288, 0xb8810904,
4096 	0xb7814000, 0xd1196a01,
4097 	0x00000301, 0xbe800087,
4098 	0xbefc00c1, 0xd89c4000,
4099 	0x00020201, 0xd89cc080,
4100 	0x00040401, 0x320202ff,
4101 	0x00000800, 0x80808100,
4102 	0xbf84fff8, 0x7e020280,
4103 	0xbf810000, 0x00000000,
4104 };
4105 
4106 static const u32 sgpr_init_compute_shader[] =
4107 {
4108 	0xb07c0000, 0xbe8000ff,
4109 	0x0000005f, 0xbee50080,
4110 	0xbe812c65, 0xbe822c65,
4111 	0xbe832c65, 0xbe842c65,
4112 	0xbe852c65, 0xb77c0005,
4113 	0x80808500, 0xbf84fff8,
4114 	0xbe800080, 0xbf810000,
4115 };
4116 
4117 static const u32 vgpr_init_compute_shader_arcturus[] = {
4118 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4119 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4120 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4121 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4122 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4123 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4124 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4125 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4126 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4127 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4128 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4129 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4130 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4131 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4132 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4133 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4134 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4135 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4136 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4137 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4138 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4139 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4140 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4141 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4142 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4143 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4144 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4145 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4146 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4147 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4148 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4149 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4150 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4151 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4152 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4153 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4154 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4155 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4156 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4157 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4158 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4159 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4160 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4161 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4162 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4163 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4164 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4165 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4166 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4167 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4168 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4169 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4170 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4171 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4172 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4173 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4174 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4175 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4176 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4177 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4178 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4179 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4180 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4181 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4182 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4183 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4184 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4185 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4186 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4187 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4188 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4189 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4190 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4191 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4192 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4193 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4194 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4195 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4196 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4197 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4198 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4199 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4200 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4201 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4202 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4203 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4204 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4205 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4206 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4207 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4208 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4209 	0xbf84fff8, 0xbf810000,
4210 };
4211 
4212 /* When below register arrays changed, please update gpr_reg_size,
4213   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4214   to cover all gfx9 ASICs */
4215 static const struct soc15_reg_entry vgpr_init_regs[] = {
4216    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4217    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4218    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4219    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4220    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4221    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4222    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4223    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4224    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4225    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4226    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4227    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4228    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4229    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4230 };
4231 
4232 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4233    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4234    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4235    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4236    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4237    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4238    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4239    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4240    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4241    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4242    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4243    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4244    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4245    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4246    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4247 };
4248 
4249 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4250    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4251    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4252    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4253    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4254    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4255    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4256    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4257    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4258    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4259    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4260    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4261    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4262    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4263    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4264 };
4265 
4266 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4267    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4268    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4269    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4270    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4271    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4272    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4273    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4274    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4275    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4276    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4277    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4278    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4279    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4280    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4281 };
4282 
4283 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4284    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4285    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4286    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4287    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4288    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4289    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4290    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4291    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4292    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4293    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4294    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4295    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4296    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4297    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4298    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4299    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4300    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4301    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4302    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4303    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4304    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4305    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4306    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4307    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4308    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4309    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4310    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4311    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4312    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4313    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4314    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4315    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4316    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4317 };
4318 
4319 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4320 {
4321 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4322 	int i, r;
4323 
4324 	/* only support when RAS is enabled */
4325 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4326 		return 0;
4327 
4328 	r = amdgpu_ring_alloc(ring, 7);
4329 	if (r) {
4330 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4331 			ring->name, r);
4332 		return r;
4333 	}
4334 
4335 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4336 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4337 
4338 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4339 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4340 				PACKET3_DMA_DATA_DST_SEL(1) |
4341 				PACKET3_DMA_DATA_SRC_SEL(2) |
4342 				PACKET3_DMA_DATA_ENGINE(0)));
4343 	amdgpu_ring_write(ring, 0);
4344 	amdgpu_ring_write(ring, 0);
4345 	amdgpu_ring_write(ring, 0);
4346 	amdgpu_ring_write(ring, 0);
4347 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4348 				adev->gds.gds_size);
4349 
4350 	amdgpu_ring_commit(ring);
4351 
4352 	for (i = 0; i < adev->usec_timeout; i++) {
4353 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4354 			break;
4355 		udelay(1);
4356 	}
4357 
4358 	if (i >= adev->usec_timeout)
4359 		r = -ETIMEDOUT;
4360 
4361 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4362 
4363 	return r;
4364 }
4365 
4366 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4367 {
4368 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4369 	struct amdgpu_ib ib;
4370 	struct dma_fence *f = NULL;
4371 	int r, i;
4372 	unsigned total_size, vgpr_offset, sgpr_offset;
4373 	u64 gpu_addr;
4374 
4375 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4376 						adev->gfx.config.max_cu_per_sh *
4377 						adev->gfx.config.max_sh_per_se;
4378 	int sgpr_work_group_size = 5;
4379 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4380 	int vgpr_init_shader_size;
4381 	const u32 *vgpr_init_shader_ptr;
4382 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4383 
4384 	/* only support when RAS is enabled */
4385 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4386 		return 0;
4387 
4388 	/* bail if the compute ring is not ready */
4389 	if (!ring->sched.ready)
4390 		return 0;
4391 
4392 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4393 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4394 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4395 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4396 	} else {
4397 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4398 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4399 		vgpr_init_regs_ptr = vgpr_init_regs;
4400 	}
4401 
4402 	total_size =
4403 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4404 	total_size +=
4405 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4406 	total_size +=
4407 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4408 	total_size = ALIGN(total_size, 256);
4409 	vgpr_offset = total_size;
4410 	total_size += ALIGN(vgpr_init_shader_size, 256);
4411 	sgpr_offset = total_size;
4412 	total_size += sizeof(sgpr_init_compute_shader);
4413 
4414 	/* allocate an indirect buffer to put the commands in */
4415 	memset(&ib, 0, sizeof(ib));
4416 	r = amdgpu_ib_get(adev, NULL, total_size,
4417 					AMDGPU_IB_POOL_DIRECT, &ib);
4418 	if (r) {
4419 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4420 		return r;
4421 	}
4422 
4423 	/* load the compute shaders */
4424 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4425 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4426 
4427 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4428 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4429 
4430 	/* init the ib length to 0 */
4431 	ib.length_dw = 0;
4432 
4433 	/* VGPR */
4434 	/* write the register state for the compute dispatch */
4435 	for (i = 0; i < gpr_reg_size; i++) {
4436 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4437 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4438 								- PACKET3_SET_SH_REG_START;
4439 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4440 	}
4441 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4442 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4443 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4444 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4445 							- PACKET3_SET_SH_REG_START;
4446 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4447 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4448 
4449 	/* write dispatch packet */
4450 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4451 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4452 	ib.ptr[ib.length_dw++] = 1; /* y */
4453 	ib.ptr[ib.length_dw++] = 1; /* z */
4454 	ib.ptr[ib.length_dw++] =
4455 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4456 
4457 	/* write CS partial flush packet */
4458 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4459 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4460 
4461 	/* SGPR1 */
4462 	/* write the register state for the compute dispatch */
4463 	for (i = 0; i < gpr_reg_size; i++) {
4464 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4465 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4466 								- PACKET3_SET_SH_REG_START;
4467 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4468 	}
4469 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4470 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4471 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4472 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4473 							- PACKET3_SET_SH_REG_START;
4474 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4475 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4476 
4477 	/* write dispatch packet */
4478 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4479 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4480 	ib.ptr[ib.length_dw++] = 1; /* y */
4481 	ib.ptr[ib.length_dw++] = 1; /* z */
4482 	ib.ptr[ib.length_dw++] =
4483 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4484 
4485 	/* write CS partial flush packet */
4486 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4487 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4488 
4489 	/* SGPR2 */
4490 	/* write the register state for the compute dispatch */
4491 	for (i = 0; i < gpr_reg_size; i++) {
4492 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4493 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4494 								- PACKET3_SET_SH_REG_START;
4495 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4496 	}
4497 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4498 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4499 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4500 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4501 							- PACKET3_SET_SH_REG_START;
4502 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4503 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4504 
4505 	/* write dispatch packet */
4506 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4507 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4508 	ib.ptr[ib.length_dw++] = 1; /* y */
4509 	ib.ptr[ib.length_dw++] = 1; /* z */
4510 	ib.ptr[ib.length_dw++] =
4511 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4512 
4513 	/* write CS partial flush packet */
4514 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4515 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4516 
4517 	/* shedule the ib on the ring */
4518 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4519 	if (r) {
4520 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4521 		goto fail;
4522 	}
4523 
4524 	/* wait for the GPU to finish processing the IB */
4525 	r = dma_fence_wait(f, false);
4526 	if (r) {
4527 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4528 		goto fail;
4529 	}
4530 
4531 fail:
4532 	amdgpu_ib_free(adev, &ib, NULL);
4533 	dma_fence_put(f);
4534 
4535 	return r;
4536 }
4537 
4538 static int gfx_v9_0_early_init(void *handle)
4539 {
4540 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4541 
4542 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4543 
4544 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4545 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4546 		adev->gfx.num_gfx_rings = 0;
4547 	else
4548 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4549 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4550 					  AMDGPU_MAX_COMPUTE_RINGS);
4551 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4552 	gfx_v9_0_set_ring_funcs(adev);
4553 	gfx_v9_0_set_irq_funcs(adev);
4554 	gfx_v9_0_set_gds_init(adev);
4555 	gfx_v9_0_set_rlc_funcs(adev);
4556 
4557 	/* init rlcg reg access ctrl */
4558 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4559 
4560 	return 0;
4561 }
4562 
4563 static int gfx_v9_0_ecc_late_init(void *handle)
4564 {
4565 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4566 	int r;
4567 
4568 	/*
4569 	 * Temp workaround to fix the issue that CP firmware fails to
4570 	 * update read pointer when CPDMA is writing clearing operation
4571 	 * to GDS in suspend/resume sequence on several cards. So just
4572 	 * limit this operation in cold boot sequence.
4573 	 */
4574 	if ((!adev->in_suspend) &&
4575 	    (adev->gds.gds_size)) {
4576 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4577 		if (r)
4578 			return r;
4579 	}
4580 
4581 	/* requires IBs so do in late init after IB pool is initialized */
4582 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4583 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4584 	else
4585 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4586 
4587 	if (r)
4588 		return r;
4589 
4590 	if (adev->gfx.ras &&
4591 	    adev->gfx.ras->enable_watchdog_timer)
4592 		adev->gfx.ras->enable_watchdog_timer(adev);
4593 
4594 	return 0;
4595 }
4596 
4597 static int gfx_v9_0_late_init(void *handle)
4598 {
4599 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4600 	int r;
4601 
4602 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4603 	if (r)
4604 		return r;
4605 
4606 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4607 	if (r)
4608 		return r;
4609 
4610 	r = gfx_v9_0_ecc_late_init(handle);
4611 	if (r)
4612 		return r;
4613 
4614 	return 0;
4615 }
4616 
4617 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4618 {
4619 	uint32_t rlc_setting;
4620 
4621 	/* if RLC is not enabled, do nothing */
4622 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4623 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4624 		return false;
4625 
4626 	return true;
4627 }
4628 
4629 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4630 {
4631 	uint32_t data;
4632 	unsigned i;
4633 
4634 	data = RLC_SAFE_MODE__CMD_MASK;
4635 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4636 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4637 
4638 	/* wait for RLC_SAFE_MODE */
4639 	for (i = 0; i < adev->usec_timeout; i++) {
4640 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4641 			break;
4642 		udelay(1);
4643 	}
4644 }
4645 
4646 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4647 {
4648 	uint32_t data;
4649 
4650 	data = RLC_SAFE_MODE__CMD_MASK;
4651 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4652 }
4653 
4654 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4655 						bool enable)
4656 {
4657 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4658 
4659 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4660 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4661 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4662 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4663 	} else {
4664 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4665 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4666 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4667 	}
4668 
4669 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4670 }
4671 
4672 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4673 						bool enable)
4674 {
4675 	/* TODO: double check if we need to perform under safe mode */
4676 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4677 
4678 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4679 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4680 	else
4681 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4682 
4683 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4684 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4685 	else
4686 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4687 
4688 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4689 }
4690 
4691 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4692 						      bool enable)
4693 {
4694 	uint32_t data, def;
4695 
4696 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4697 
4698 	/* It is disabled by HW by default */
4699 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4700 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4701 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4702 
4703 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4704 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4705 
4706 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4707 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4708 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4709 
4710 		/* only for Vega10 & Raven1 */
4711 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4712 
4713 		if (def != data)
4714 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4715 
4716 		/* MGLS is a global flag to control all MGLS in GFX */
4717 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4718 			/* 2 - RLC memory Light sleep */
4719 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4720 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4721 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4722 				if (def != data)
4723 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4724 			}
4725 			/* 3 - CP memory Light sleep */
4726 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4727 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4728 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4729 				if (def != data)
4730 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4731 			}
4732 		}
4733 	} else {
4734 		/* 1 - MGCG_OVERRIDE */
4735 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4736 
4737 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4738 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4739 
4740 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4741 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4742 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4743 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4744 
4745 		if (def != data)
4746 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4747 
4748 		/* 2 - disable MGLS in RLC */
4749 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4750 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4751 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4752 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4753 		}
4754 
4755 		/* 3 - disable MGLS in CP */
4756 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4757 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4758 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4759 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4760 		}
4761 	}
4762 
4763 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4764 }
4765 
4766 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4767 					   bool enable)
4768 {
4769 	uint32_t data, def;
4770 
4771 	if (!adev->gfx.num_gfx_rings)
4772 		return;
4773 
4774 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4775 
4776 	/* Enable 3D CGCG/CGLS */
4777 	if (enable) {
4778 		/* write cmd to clear cgcg/cgls ov */
4779 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4780 		/* unset CGCG override */
4781 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4782 		/* update CGCG and CGLS override bits */
4783 		if (def != data)
4784 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4785 
4786 		/* enable 3Dcgcg FSM(0x0000363f) */
4787 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4788 
4789 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4790 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4791 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4792 		else
4793 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4794 
4795 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4796 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4797 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4798 		if (def != data)
4799 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4800 
4801 		/* set IDLE_POLL_COUNT(0x00900100) */
4802 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4803 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4804 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4805 		if (def != data)
4806 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4807 	} else {
4808 		/* Disable CGCG/CGLS */
4809 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4810 		/* disable cgcg, cgls should be disabled */
4811 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4812 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4813 		/* disable cgcg and cgls in FSM */
4814 		if (def != data)
4815 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4816 	}
4817 
4818 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4819 }
4820 
4821 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4822 						      bool enable)
4823 {
4824 	uint32_t def, data;
4825 
4826 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4827 
4828 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4829 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4830 		/* unset CGCG override */
4831 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4832 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4833 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4834 		else
4835 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4836 		/* update CGCG and CGLS override bits */
4837 		if (def != data)
4838 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4839 
4840 		/* enable cgcg FSM(0x0000363F) */
4841 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4842 
4843 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4844 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4845 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4846 		else
4847 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4848 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4849 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4850 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4851 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4852 		if (def != data)
4853 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4854 
4855 		/* set IDLE_POLL_COUNT(0x00900100) */
4856 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4857 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4858 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4859 		if (def != data)
4860 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4861 	} else {
4862 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4863 		/* reset CGCG/CGLS bits */
4864 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4865 		/* disable cgcg and cgls in FSM */
4866 		if (def != data)
4867 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4868 	}
4869 
4870 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4871 }
4872 
4873 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4874 					    bool enable)
4875 {
4876 	if (enable) {
4877 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4878 		 * ===  MGCG + MGLS ===
4879 		 */
4880 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4881 		/* ===  CGCG /CGLS for GFX 3D Only === */
4882 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4883 		/* ===  CGCG + CGLS === */
4884 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4885 	} else {
4886 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4887 		 * ===  CGCG + CGLS ===
4888 		 */
4889 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4890 		/* ===  CGCG /CGLS for GFX 3D Only === */
4891 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4892 		/* ===  MGCG + MGLS === */
4893 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4894 	}
4895 	return 0;
4896 }
4897 
4898 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4899 {
4900 	u32 reg, data;
4901 
4902 	amdgpu_gfx_off_ctrl(adev, false);
4903 
4904 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4905 	if (amdgpu_sriov_is_pp_one_vf(adev))
4906 		data = RREG32_NO_KIQ(reg);
4907 	else
4908 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4909 
4910 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4911 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4912 
4913 	if (amdgpu_sriov_is_pp_one_vf(adev))
4914 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4915 	else
4916 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4917 
4918 	amdgpu_gfx_off_ctrl(adev, true);
4919 }
4920 
4921 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4922 					uint32_t offset,
4923 					struct soc15_reg_rlcg *entries, int arr_size)
4924 {
4925 	int i;
4926 	uint32_t reg;
4927 
4928 	if (!entries)
4929 		return false;
4930 
4931 	for (i = 0; i < arr_size; i++) {
4932 		const struct soc15_reg_rlcg *entry;
4933 
4934 		entry = &entries[i];
4935 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4936 		if (offset == reg)
4937 			return true;
4938 	}
4939 
4940 	return false;
4941 }
4942 
4943 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4944 {
4945 	return gfx_v9_0_check_rlcg_range(adev, offset,
4946 					(void *)rlcg_access_gc_9_0,
4947 					ARRAY_SIZE(rlcg_access_gc_9_0));
4948 }
4949 
4950 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4951 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4952 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4953 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4954 	.init = gfx_v9_0_rlc_init,
4955 	.get_csb_size = gfx_v9_0_get_csb_size,
4956 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4957 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4958 	.resume = gfx_v9_0_rlc_resume,
4959 	.stop = gfx_v9_0_rlc_stop,
4960 	.reset = gfx_v9_0_rlc_reset,
4961 	.start = gfx_v9_0_rlc_start,
4962 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4963 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4964 };
4965 
4966 static int gfx_v9_0_set_powergating_state(void *handle,
4967 					  enum amd_powergating_state state)
4968 {
4969 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4970 	bool enable = (state == AMD_PG_STATE_GATE);
4971 
4972 	switch (adev->ip_versions[GC_HWIP][0]) {
4973 	case IP_VERSION(9, 2, 2):
4974 	case IP_VERSION(9, 1, 0):
4975 	case IP_VERSION(9, 3, 0):
4976 		if (!enable)
4977 			amdgpu_gfx_off_ctrl(adev, false);
4978 
4979 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4980 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4981 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4982 		} else {
4983 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4984 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4985 		}
4986 
4987 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4988 			gfx_v9_0_enable_cp_power_gating(adev, true);
4989 		else
4990 			gfx_v9_0_enable_cp_power_gating(adev, false);
4991 
4992 		/* update gfx cgpg state */
4993 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4994 
4995 		/* update mgcg state */
4996 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4997 
4998 		if (enable)
4999 			amdgpu_gfx_off_ctrl(adev, true);
5000 		break;
5001 	case IP_VERSION(9, 2, 1):
5002 		amdgpu_gfx_off_ctrl(adev, enable);
5003 		break;
5004 	default:
5005 		break;
5006 	}
5007 
5008 	return 0;
5009 }
5010 
5011 static int gfx_v9_0_set_clockgating_state(void *handle,
5012 					  enum amd_clockgating_state state)
5013 {
5014 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5015 
5016 	if (amdgpu_sriov_vf(adev))
5017 		return 0;
5018 
5019 	switch (adev->ip_versions[GC_HWIP][0]) {
5020 	case IP_VERSION(9, 0, 1):
5021 	case IP_VERSION(9, 2, 1):
5022 	case IP_VERSION(9, 4, 0):
5023 	case IP_VERSION(9, 2, 2):
5024 	case IP_VERSION(9, 1, 0):
5025 	case IP_VERSION(9, 4, 1):
5026 	case IP_VERSION(9, 3, 0):
5027 	case IP_VERSION(9, 4, 2):
5028 		gfx_v9_0_update_gfx_clock_gating(adev,
5029 						 state == AMD_CG_STATE_GATE);
5030 		break;
5031 	default:
5032 		break;
5033 	}
5034 	return 0;
5035 }
5036 
5037 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5038 {
5039 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5040 	int data;
5041 
5042 	if (amdgpu_sriov_vf(adev))
5043 		*flags = 0;
5044 
5045 	/* AMD_CG_SUPPORT_GFX_MGCG */
5046 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5047 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5048 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5049 
5050 	/* AMD_CG_SUPPORT_GFX_CGCG */
5051 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5052 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5053 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5054 
5055 	/* AMD_CG_SUPPORT_GFX_CGLS */
5056 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5057 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5058 
5059 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5060 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5061 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5062 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5063 
5064 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5065 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5066 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5067 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5068 
5069 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5070 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5071 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5072 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5073 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5074 
5075 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5076 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5077 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5078 	}
5079 }
5080 
5081 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5082 {
5083 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5084 }
5085 
5086 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5087 {
5088 	struct amdgpu_device *adev = ring->adev;
5089 	u64 wptr;
5090 
5091 	/* XXX check if swapping is necessary on BE */
5092 	if (ring->use_doorbell) {
5093 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5094 	} else {
5095 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5096 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5097 	}
5098 
5099 	return wptr;
5100 }
5101 
5102 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5103 {
5104 	struct amdgpu_device *adev = ring->adev;
5105 
5106 	if (ring->use_doorbell) {
5107 		/* XXX check if swapping is necessary on BE */
5108 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5109 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5110 	} else {
5111 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5112 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5113 	}
5114 }
5115 
5116 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5117 {
5118 	struct amdgpu_device *adev = ring->adev;
5119 	u32 ref_and_mask, reg_mem_engine;
5120 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5121 
5122 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5123 		switch (ring->me) {
5124 		case 1:
5125 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5126 			break;
5127 		case 2:
5128 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5129 			break;
5130 		default:
5131 			return;
5132 		}
5133 		reg_mem_engine = 0;
5134 	} else {
5135 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5136 		reg_mem_engine = 1; /* pfp */
5137 	}
5138 
5139 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5140 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5141 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5142 			      ref_and_mask, ref_and_mask, 0x20);
5143 }
5144 
5145 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5146 					struct amdgpu_job *job,
5147 					struct amdgpu_ib *ib,
5148 					uint32_t flags)
5149 {
5150 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5151 	u32 header, control = 0;
5152 
5153 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5154 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5155 	else
5156 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5157 
5158 	control |= ib->length_dw | (vmid << 24);
5159 
5160 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5161 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5162 
5163 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5164 			gfx_v9_0_ring_emit_de_meta(ring);
5165 	}
5166 
5167 	amdgpu_ring_write(ring, header);
5168 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5169 	amdgpu_ring_write(ring,
5170 #ifdef __BIG_ENDIAN
5171 		(2 << 0) |
5172 #endif
5173 		lower_32_bits(ib->gpu_addr));
5174 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5175 	amdgpu_ring_write(ring, control);
5176 }
5177 
5178 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5179 					  struct amdgpu_job *job,
5180 					  struct amdgpu_ib *ib,
5181 					  uint32_t flags)
5182 {
5183 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5184 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5185 
5186 	/* Currently, there is a high possibility to get wave ID mismatch
5187 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5188 	 * different wave IDs than the GDS expects. This situation happens
5189 	 * randomly when at least 5 compute pipes use GDS ordered append.
5190 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5191 	 * Those are probably bugs somewhere else in the kernel driver.
5192 	 *
5193 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5194 	 * GDS to 0 for this ring (me/pipe).
5195 	 */
5196 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5197 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5198 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5199 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5200 	}
5201 
5202 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5203 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5204 	amdgpu_ring_write(ring,
5205 #ifdef __BIG_ENDIAN
5206 				(2 << 0) |
5207 #endif
5208 				lower_32_bits(ib->gpu_addr));
5209 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5210 	amdgpu_ring_write(ring, control);
5211 }
5212 
5213 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5214 				     u64 seq, unsigned flags)
5215 {
5216 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5217 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5218 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5219 
5220 	/* RELEASE_MEM - flush caches, send int */
5221 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5222 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5223 					       EOP_TC_NC_ACTION_EN) :
5224 					      (EOP_TCL1_ACTION_EN |
5225 					       EOP_TC_ACTION_EN |
5226 					       EOP_TC_WB_ACTION_EN |
5227 					       EOP_TC_MD_ACTION_EN)) |
5228 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5229 				 EVENT_INDEX(5)));
5230 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5231 
5232 	/*
5233 	 * the address should be Qword aligned if 64bit write, Dword
5234 	 * aligned if only send 32bit data low (discard data high)
5235 	 */
5236 	if (write64bit)
5237 		BUG_ON(addr & 0x7);
5238 	else
5239 		BUG_ON(addr & 0x3);
5240 	amdgpu_ring_write(ring, lower_32_bits(addr));
5241 	amdgpu_ring_write(ring, upper_32_bits(addr));
5242 	amdgpu_ring_write(ring, lower_32_bits(seq));
5243 	amdgpu_ring_write(ring, upper_32_bits(seq));
5244 	amdgpu_ring_write(ring, 0);
5245 }
5246 
5247 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5248 {
5249 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5250 	uint32_t seq = ring->fence_drv.sync_seq;
5251 	uint64_t addr = ring->fence_drv.gpu_addr;
5252 
5253 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5254 			      lower_32_bits(addr), upper_32_bits(addr),
5255 			      seq, 0xffffffff, 4);
5256 }
5257 
5258 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5259 					unsigned vmid, uint64_t pd_addr)
5260 {
5261 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5262 
5263 	/* compute doesn't have PFP */
5264 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5265 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5266 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5267 		amdgpu_ring_write(ring, 0x0);
5268 	}
5269 }
5270 
5271 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5272 {
5273 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5274 }
5275 
5276 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5277 {
5278 	u64 wptr;
5279 
5280 	/* XXX check if swapping is necessary on BE */
5281 	if (ring->use_doorbell)
5282 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5283 	else
5284 		BUG();
5285 	return wptr;
5286 }
5287 
5288 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5289 {
5290 	struct amdgpu_device *adev = ring->adev;
5291 
5292 	/* XXX check if swapping is necessary on BE */
5293 	if (ring->use_doorbell) {
5294 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5295 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5296 	} else{
5297 		BUG(); /* only DOORBELL method supported on gfx9 now */
5298 	}
5299 }
5300 
5301 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5302 					 u64 seq, unsigned int flags)
5303 {
5304 	struct amdgpu_device *adev = ring->adev;
5305 
5306 	/* we only allocate 32bit for each seq wb address */
5307 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5308 
5309 	/* write fence seq to the "addr" */
5310 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5311 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5312 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5313 	amdgpu_ring_write(ring, lower_32_bits(addr));
5314 	amdgpu_ring_write(ring, upper_32_bits(addr));
5315 	amdgpu_ring_write(ring, lower_32_bits(seq));
5316 
5317 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5318 		/* set register to trigger INT */
5319 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5320 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5321 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5322 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5323 		amdgpu_ring_write(ring, 0);
5324 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5325 	}
5326 }
5327 
5328 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5329 {
5330 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5331 	amdgpu_ring_write(ring, 0);
5332 }
5333 
5334 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5335 {
5336 	struct v9_ce_ib_state ce_payload = {0};
5337 	uint64_t csa_addr;
5338 	int cnt;
5339 
5340 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5341 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5342 
5343 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5344 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5345 				 WRITE_DATA_DST_SEL(8) |
5346 				 WR_CONFIRM) |
5347 				 WRITE_DATA_CACHE_POLICY(0));
5348 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5349 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5350 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5351 }
5352 
5353 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5354 {
5355 	struct v9_de_ib_state de_payload = {0};
5356 	uint64_t csa_addr, gds_addr;
5357 	int cnt;
5358 
5359 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5360 	gds_addr = csa_addr + 4096;
5361 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5362 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5363 
5364 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5365 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5366 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5367 				 WRITE_DATA_DST_SEL(8) |
5368 				 WR_CONFIRM) |
5369 				 WRITE_DATA_CACHE_POLICY(0));
5370 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5371 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5372 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5373 }
5374 
5375 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5376 				   bool secure)
5377 {
5378 	uint32_t v = secure ? FRAME_TMZ : 0;
5379 
5380 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5381 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5382 }
5383 
5384 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5385 {
5386 	uint32_t dw2 = 0;
5387 
5388 	if (amdgpu_sriov_vf(ring->adev))
5389 		gfx_v9_0_ring_emit_ce_meta(ring);
5390 
5391 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5392 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5393 		/* set load_global_config & load_global_uconfig */
5394 		dw2 |= 0x8001;
5395 		/* set load_cs_sh_regs */
5396 		dw2 |= 0x01000000;
5397 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5398 		dw2 |= 0x10002;
5399 
5400 		/* set load_ce_ram if preamble presented */
5401 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5402 			dw2 |= 0x10000000;
5403 	} else {
5404 		/* still load_ce_ram if this is the first time preamble presented
5405 		 * although there is no context switch happens.
5406 		 */
5407 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5408 			dw2 |= 0x10000000;
5409 	}
5410 
5411 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5412 	amdgpu_ring_write(ring, dw2);
5413 	amdgpu_ring_write(ring, 0);
5414 }
5415 
5416 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5417 {
5418 	unsigned ret;
5419 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5420 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5421 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5422 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5423 	ret = ring->wptr & ring->buf_mask;
5424 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5425 	return ret;
5426 }
5427 
5428 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5429 {
5430 	unsigned cur;
5431 	BUG_ON(offset > ring->buf_mask);
5432 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5433 
5434 	cur = (ring->wptr - 1) & ring->buf_mask;
5435 	if (likely(cur > offset))
5436 		ring->ring[offset] = cur - offset;
5437 	else
5438 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5439 }
5440 
5441 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5442 				    uint32_t reg_val_offs)
5443 {
5444 	struct amdgpu_device *adev = ring->adev;
5445 
5446 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5447 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5448 				(5 << 8) |	/* dst: memory */
5449 				(1 << 20));	/* write confirm */
5450 	amdgpu_ring_write(ring, reg);
5451 	amdgpu_ring_write(ring, 0);
5452 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5453 				reg_val_offs * 4));
5454 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5455 				reg_val_offs * 4));
5456 }
5457 
5458 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5459 				    uint32_t val)
5460 {
5461 	uint32_t cmd = 0;
5462 
5463 	switch (ring->funcs->type) {
5464 	case AMDGPU_RING_TYPE_GFX:
5465 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5466 		break;
5467 	case AMDGPU_RING_TYPE_KIQ:
5468 		cmd = (1 << 16); /* no inc addr */
5469 		break;
5470 	default:
5471 		cmd = WR_CONFIRM;
5472 		break;
5473 	}
5474 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5475 	amdgpu_ring_write(ring, cmd);
5476 	amdgpu_ring_write(ring, reg);
5477 	amdgpu_ring_write(ring, 0);
5478 	amdgpu_ring_write(ring, val);
5479 }
5480 
5481 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5482 					uint32_t val, uint32_t mask)
5483 {
5484 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5485 }
5486 
5487 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5488 						  uint32_t reg0, uint32_t reg1,
5489 						  uint32_t ref, uint32_t mask)
5490 {
5491 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5492 	struct amdgpu_device *adev = ring->adev;
5493 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5494 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5495 
5496 	if (fw_version_ok)
5497 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5498 				      ref, mask, 0x20);
5499 	else
5500 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5501 							   ref, mask);
5502 }
5503 
5504 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5505 {
5506 	struct amdgpu_device *adev = ring->adev;
5507 	uint32_t value = 0;
5508 
5509 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5510 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5511 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5512 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5513 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5514 }
5515 
5516 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5517 						 enum amdgpu_interrupt_state state)
5518 {
5519 	switch (state) {
5520 	case AMDGPU_IRQ_STATE_DISABLE:
5521 	case AMDGPU_IRQ_STATE_ENABLE:
5522 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5523 			       TIME_STAMP_INT_ENABLE,
5524 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5525 		break;
5526 	default:
5527 		break;
5528 	}
5529 }
5530 
5531 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5532 						     int me, int pipe,
5533 						     enum amdgpu_interrupt_state state)
5534 {
5535 	u32 mec_int_cntl, mec_int_cntl_reg;
5536 
5537 	/*
5538 	 * amdgpu controls only the first MEC. That's why this function only
5539 	 * handles the setting of interrupts for this specific MEC. All other
5540 	 * pipes' interrupts are set by amdkfd.
5541 	 */
5542 
5543 	if (me == 1) {
5544 		switch (pipe) {
5545 		case 0:
5546 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5547 			break;
5548 		case 1:
5549 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5550 			break;
5551 		case 2:
5552 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5553 			break;
5554 		case 3:
5555 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5556 			break;
5557 		default:
5558 			DRM_DEBUG("invalid pipe %d\n", pipe);
5559 			return;
5560 		}
5561 	} else {
5562 		DRM_DEBUG("invalid me %d\n", me);
5563 		return;
5564 	}
5565 
5566 	switch (state) {
5567 	case AMDGPU_IRQ_STATE_DISABLE:
5568 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5569 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5570 					     TIME_STAMP_INT_ENABLE, 0);
5571 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5572 		break;
5573 	case AMDGPU_IRQ_STATE_ENABLE:
5574 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5575 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5576 					     TIME_STAMP_INT_ENABLE, 1);
5577 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5578 		break;
5579 	default:
5580 		break;
5581 	}
5582 }
5583 
5584 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5585 					     struct amdgpu_irq_src *source,
5586 					     unsigned type,
5587 					     enum amdgpu_interrupt_state state)
5588 {
5589 	switch (state) {
5590 	case AMDGPU_IRQ_STATE_DISABLE:
5591 	case AMDGPU_IRQ_STATE_ENABLE:
5592 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5593 			       PRIV_REG_INT_ENABLE,
5594 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5595 		break;
5596 	default:
5597 		break;
5598 	}
5599 
5600 	return 0;
5601 }
5602 
5603 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5604 					      struct amdgpu_irq_src *source,
5605 					      unsigned type,
5606 					      enum amdgpu_interrupt_state state)
5607 {
5608 	switch (state) {
5609 	case AMDGPU_IRQ_STATE_DISABLE:
5610 	case AMDGPU_IRQ_STATE_ENABLE:
5611 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5612 			       PRIV_INSTR_INT_ENABLE,
5613 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5614 		break;
5615 	default:
5616 		break;
5617 	}
5618 
5619 	return 0;
5620 }
5621 
5622 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5623 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5624 			CP_ECC_ERROR_INT_ENABLE, 1)
5625 
5626 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5627 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5628 			CP_ECC_ERROR_INT_ENABLE, 0)
5629 
5630 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5631 					      struct amdgpu_irq_src *source,
5632 					      unsigned type,
5633 					      enum amdgpu_interrupt_state state)
5634 {
5635 	switch (state) {
5636 	case AMDGPU_IRQ_STATE_DISABLE:
5637 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5638 				CP_ECC_ERROR_INT_ENABLE, 0);
5639 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5640 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5641 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5642 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5643 		break;
5644 
5645 	case AMDGPU_IRQ_STATE_ENABLE:
5646 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5647 				CP_ECC_ERROR_INT_ENABLE, 1);
5648 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5649 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5650 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5651 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5652 		break;
5653 	default:
5654 		break;
5655 	}
5656 
5657 	return 0;
5658 }
5659 
5660 
5661 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5662 					    struct amdgpu_irq_src *src,
5663 					    unsigned type,
5664 					    enum amdgpu_interrupt_state state)
5665 {
5666 	switch (type) {
5667 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5668 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5669 		break;
5670 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5671 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5672 		break;
5673 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5674 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5675 		break;
5676 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5677 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5678 		break;
5679 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5680 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5681 		break;
5682 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5683 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5684 		break;
5685 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5686 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5687 		break;
5688 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5689 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5690 		break;
5691 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5692 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5693 		break;
5694 	default:
5695 		break;
5696 	}
5697 	return 0;
5698 }
5699 
5700 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5701 			    struct amdgpu_irq_src *source,
5702 			    struct amdgpu_iv_entry *entry)
5703 {
5704 	int i;
5705 	u8 me_id, pipe_id, queue_id;
5706 	struct amdgpu_ring *ring;
5707 
5708 	DRM_DEBUG("IH: CP EOP\n");
5709 	me_id = (entry->ring_id & 0x0c) >> 2;
5710 	pipe_id = (entry->ring_id & 0x03) >> 0;
5711 	queue_id = (entry->ring_id & 0x70) >> 4;
5712 
5713 	switch (me_id) {
5714 	case 0:
5715 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5716 		break;
5717 	case 1:
5718 	case 2:
5719 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5720 			ring = &adev->gfx.compute_ring[i];
5721 			/* Per-queue interrupt is supported for MEC starting from VI.
5722 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5723 			  */
5724 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5725 				amdgpu_fence_process(ring);
5726 		}
5727 		break;
5728 	}
5729 	return 0;
5730 }
5731 
5732 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5733 			   struct amdgpu_iv_entry *entry)
5734 {
5735 	u8 me_id, pipe_id, queue_id;
5736 	struct amdgpu_ring *ring;
5737 	int i;
5738 
5739 	me_id = (entry->ring_id & 0x0c) >> 2;
5740 	pipe_id = (entry->ring_id & 0x03) >> 0;
5741 	queue_id = (entry->ring_id & 0x70) >> 4;
5742 
5743 	switch (me_id) {
5744 	case 0:
5745 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5746 		break;
5747 	case 1:
5748 	case 2:
5749 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5750 			ring = &adev->gfx.compute_ring[i];
5751 			if (ring->me == me_id && ring->pipe == pipe_id &&
5752 			    ring->queue == queue_id)
5753 				drm_sched_fault(&ring->sched);
5754 		}
5755 		break;
5756 	}
5757 }
5758 
5759 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5760 				 struct amdgpu_irq_src *source,
5761 				 struct amdgpu_iv_entry *entry)
5762 {
5763 	DRM_ERROR("Illegal register access in command stream\n");
5764 	gfx_v9_0_fault(adev, entry);
5765 	return 0;
5766 }
5767 
5768 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5769 				  struct amdgpu_irq_src *source,
5770 				  struct amdgpu_iv_entry *entry)
5771 {
5772 	DRM_ERROR("Illegal instruction in command stream\n");
5773 	gfx_v9_0_fault(adev, entry);
5774 	return 0;
5775 }
5776 
5777 
5778 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5779 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5780 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5781 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5782 	},
5783 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5784 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5785 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5786 	},
5787 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5788 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5789 	  0, 0
5790 	},
5791 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5792 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5793 	  0, 0
5794 	},
5795 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5796 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5797 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5798 	},
5799 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5800 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5801 	  0, 0
5802 	},
5803 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5804 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5805 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5806 	},
5807 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5808 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5809 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5810 	},
5811 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5812 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5813 	  0, 0
5814 	},
5815 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5816 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5817 	  0, 0
5818 	},
5819 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5820 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5821 	  0, 0
5822 	},
5823 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5824 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5825 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5826 	},
5827 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5828 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5829 	  0, 0
5830 	},
5831 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5832 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5833 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5834 	},
5835 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5836 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5837 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5838 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5839 	},
5840 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5841 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5842 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5843 	  0, 0
5844 	},
5845 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5846 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5847 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5848 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5849 	},
5850 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5851 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5852 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5853 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5854 	},
5855 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5856 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5857 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5858 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5859 	},
5860 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5861 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5862 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5863 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5864 	},
5865 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5866 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5867 	  0, 0
5868 	},
5869 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5870 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5871 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5872 	},
5873 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5874 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5875 	  0, 0
5876 	},
5877 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5878 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5879 	  0, 0
5880 	},
5881 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5882 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5883 	  0, 0
5884 	},
5885 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5886 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5887 	  0, 0
5888 	},
5889 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5890 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5891 	  0, 0
5892 	},
5893 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5894 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5895 	  0, 0
5896 	},
5897 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5898 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5899 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5900 	},
5901 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5902 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5903 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5904 	},
5905 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5906 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5907 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5908 	},
5909 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5910 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5911 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5912 	},
5913 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5914 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5915 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5916 	},
5917 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5918 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5919 	  0, 0
5920 	},
5921 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5922 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5923 	  0, 0
5924 	},
5925 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5926 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5927 	  0, 0
5928 	},
5929 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5930 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5931 	  0, 0
5932 	},
5933 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5934 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5935 	  0, 0
5936 	},
5937 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5938 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5939 	  0, 0
5940 	},
5941 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5942 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5943 	  0, 0
5944 	},
5945 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5946 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5947 	  0, 0
5948 	},
5949 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5950 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5951 	  0, 0
5952 	},
5953 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5954 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5955 	  0, 0
5956 	},
5957 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5958 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5959 	  0, 0
5960 	},
5961 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5962 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5963 	  0, 0
5964 	},
5965 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5966 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5967 	  0, 0
5968 	},
5969 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5970 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5971 	  0, 0
5972 	},
5973 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5974 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5975 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5976 	},
5977 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5978 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5979 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5980 	},
5981 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5982 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5983 	  0, 0
5984 	},
5985 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5986 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5987 	  0, 0
5988 	},
5989 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5990 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5991 	  0, 0
5992 	},
5993 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5994 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5995 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5996 	},
5997 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5998 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5999 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6000 	},
6001 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6002 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6003 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6004 	},
6005 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6006 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6007 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6008 	},
6009 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6010 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6011 	  0, 0
6012 	},
6013 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6014 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6015 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6016 	},
6017 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6018 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6019 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6020 	},
6021 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6022 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6023 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6024 	},
6025 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6026 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6027 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6028 	},
6029 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6030 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6031 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6032 	},
6033 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6034 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6035 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6036 	},
6037 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6038 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6039 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6040 	},
6041 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6042 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6043 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6044 	},
6045 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6046 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6047 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6048 	},
6049 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6050 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6051 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6052 	},
6053 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6054 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6055 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6056 	},
6057 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6058 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6059 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6060 	},
6061 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6062 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6063 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6064 	},
6065 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6066 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6067 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6068 	},
6069 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6070 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6071 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6072 	},
6073 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6074 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6075 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6076 	},
6077 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6078 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6079 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6080 	},
6081 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6082 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6083 	  0, 0
6084 	},
6085 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6086 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6087 	  0, 0
6088 	},
6089 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6090 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6091 	  0, 0
6092 	},
6093 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6094 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6095 	  0, 0
6096 	},
6097 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6098 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6099 	  0, 0
6100 	},
6101 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6102 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6103 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6104 	},
6105 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6106 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6107 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6108 	},
6109 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6110 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6111 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6112 	},
6113 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6114 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6115 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6116 	},
6117 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6118 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6119 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6120 	},
6121 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6122 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6123 	  0, 0
6124 	},
6125 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6126 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6127 	  0, 0
6128 	},
6129 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6130 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6131 	  0, 0
6132 	},
6133 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6134 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6135 	  0, 0
6136 	},
6137 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6138 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6139 	  0, 0
6140 	},
6141 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6142 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6143 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6144 	},
6145 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6146 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6147 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6148 	},
6149 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6150 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6151 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6152 	},
6153 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6154 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6155 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6156 	},
6157 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6158 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6159 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6160 	},
6161 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6162 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6163 	  0, 0
6164 	},
6165 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6166 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6167 	  0, 0
6168 	},
6169 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6170 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6171 	  0, 0
6172 	},
6173 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6174 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6175 	  0, 0
6176 	},
6177 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6178 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6179 	  0, 0
6180 	},
6181 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6182 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6183 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6184 	},
6185 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6186 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6187 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6188 	},
6189 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6190 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6191 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6192 	},
6193 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6194 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6195 	  0, 0
6196 	},
6197 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6198 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6199 	  0, 0
6200 	},
6201 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6202 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6203 	  0, 0
6204 	},
6205 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6206 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6207 	  0, 0
6208 	},
6209 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6210 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6211 	  0, 0
6212 	},
6213 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6214 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6215 	  0, 0
6216 	}
6217 };
6218 
6219 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6220 				     void *inject_if)
6221 {
6222 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6223 	int ret;
6224 	struct ta_ras_trigger_error_input block_info = { 0 };
6225 
6226 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6227 		return -EINVAL;
6228 
6229 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6230 		return -EINVAL;
6231 
6232 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6233 		return -EPERM;
6234 
6235 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6236 	      info->head.type)) {
6237 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6238 			ras_gfx_subblocks[info->head.sub_block_index].name,
6239 			info->head.type);
6240 		return -EPERM;
6241 	}
6242 
6243 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6244 	      info->head.type)) {
6245 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6246 			ras_gfx_subblocks[info->head.sub_block_index].name,
6247 			info->head.type);
6248 		return -EPERM;
6249 	}
6250 
6251 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6252 	block_info.sub_block_index =
6253 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6254 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6255 	block_info.address = info->address;
6256 	block_info.value = info->value;
6257 
6258 	mutex_lock(&adev->grbm_idx_mutex);
6259 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6260 	mutex_unlock(&adev->grbm_idx_mutex);
6261 
6262 	return ret;
6263 }
6264 
6265 static const char *vml2_mems[] = {
6266 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6267 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6268 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6269 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6270 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6271 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6272 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6273 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6274 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6275 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6276 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6277 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6278 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6279 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6280 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6281 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6282 };
6283 
6284 static const char *vml2_walker_mems[] = {
6285 	"UTC_VML2_CACHE_PDE0_MEM0",
6286 	"UTC_VML2_CACHE_PDE0_MEM1",
6287 	"UTC_VML2_CACHE_PDE1_MEM0",
6288 	"UTC_VML2_CACHE_PDE1_MEM1",
6289 	"UTC_VML2_CACHE_PDE2_MEM0",
6290 	"UTC_VML2_CACHE_PDE2_MEM1",
6291 	"UTC_VML2_RDIF_LOG_FIFO",
6292 };
6293 
6294 static const char *atc_l2_cache_2m_mems[] = {
6295 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6296 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6297 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6298 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6299 };
6300 
6301 static const char *atc_l2_cache_4k_mems[] = {
6302 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6303 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6304 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6305 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6306 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6307 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6308 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6309 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6310 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6311 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6312 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6313 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6314 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6315 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6316 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6317 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6318 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6319 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6320 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6321 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6322 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6323 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6324 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6325 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6326 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6327 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6328 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6329 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6330 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6331 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6332 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6333 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6334 };
6335 
6336 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6337 					 struct ras_err_data *err_data)
6338 {
6339 	uint32_t i, data;
6340 	uint32_t sec_count, ded_count;
6341 
6342 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6343 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6344 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6345 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6346 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6347 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6348 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6349 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6350 
6351 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6352 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6353 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6354 
6355 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6356 		if (sec_count) {
6357 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6358 				"SEC %d\n", i, vml2_mems[i], sec_count);
6359 			err_data->ce_count += sec_count;
6360 		}
6361 
6362 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6363 		if (ded_count) {
6364 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6365 				"DED %d\n", i, vml2_mems[i], ded_count);
6366 			err_data->ue_count += ded_count;
6367 		}
6368 	}
6369 
6370 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6371 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6372 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6373 
6374 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6375 						SEC_COUNT);
6376 		if (sec_count) {
6377 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6378 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6379 			err_data->ce_count += sec_count;
6380 		}
6381 
6382 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6383 						DED_COUNT);
6384 		if (ded_count) {
6385 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6386 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6387 			err_data->ue_count += ded_count;
6388 		}
6389 	}
6390 
6391 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6392 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6393 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6394 
6395 		sec_count = (data & 0x00006000L) >> 0xd;
6396 		if (sec_count) {
6397 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6398 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6399 				sec_count);
6400 			err_data->ce_count += sec_count;
6401 		}
6402 	}
6403 
6404 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6405 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6406 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6407 
6408 		sec_count = (data & 0x00006000L) >> 0xd;
6409 		if (sec_count) {
6410 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6411 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6412 				sec_count);
6413 			err_data->ce_count += sec_count;
6414 		}
6415 
6416 		ded_count = (data & 0x00018000L) >> 0xf;
6417 		if (ded_count) {
6418 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6419 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6420 				ded_count);
6421 			err_data->ue_count += ded_count;
6422 		}
6423 	}
6424 
6425 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6426 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6427 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6428 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6429 
6430 	return 0;
6431 }
6432 
6433 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6434 	const struct soc15_reg_entry *reg,
6435 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6436 	uint32_t *sec_count, uint32_t *ded_count)
6437 {
6438 	uint32_t i;
6439 	uint32_t sec_cnt, ded_cnt;
6440 
6441 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6442 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6443 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6444 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6445 			continue;
6446 
6447 		sec_cnt = (value &
6448 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6449 				gfx_v9_0_ras_fields[i].sec_count_shift;
6450 		if (sec_cnt) {
6451 			dev_info(adev->dev, "GFX SubBlock %s, "
6452 				"Instance[%d][%d], SEC %d\n",
6453 				gfx_v9_0_ras_fields[i].name,
6454 				se_id, inst_id,
6455 				sec_cnt);
6456 			*sec_count += sec_cnt;
6457 		}
6458 
6459 		ded_cnt = (value &
6460 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6461 				gfx_v9_0_ras_fields[i].ded_count_shift;
6462 		if (ded_cnt) {
6463 			dev_info(adev->dev, "GFX SubBlock %s, "
6464 				"Instance[%d][%d], DED %d\n",
6465 				gfx_v9_0_ras_fields[i].name,
6466 				se_id, inst_id,
6467 				ded_cnt);
6468 			*ded_count += ded_cnt;
6469 		}
6470 	}
6471 
6472 	return 0;
6473 }
6474 
6475 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6476 {
6477 	int i, j, k;
6478 
6479 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6480 		return;
6481 
6482 	/* read back registers to clear the counters */
6483 	mutex_lock(&adev->grbm_idx_mutex);
6484 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6485 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6486 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6487 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
6488 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6489 			}
6490 		}
6491 	}
6492 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6493 	mutex_unlock(&adev->grbm_idx_mutex);
6494 
6495 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6496 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6497 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6498 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6499 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6500 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6501 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6502 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6503 
6504 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6505 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6506 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6507 	}
6508 
6509 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6510 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6511 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6512 	}
6513 
6514 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6515 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6516 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6517 	}
6518 
6519 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6520 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6521 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6522 	}
6523 
6524 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6525 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6526 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6527 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6528 }
6529 
6530 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6531 					  void *ras_error_status)
6532 {
6533 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6534 	uint32_t sec_count = 0, ded_count = 0;
6535 	uint32_t i, j, k;
6536 	uint32_t reg_value;
6537 
6538 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6539 		return;
6540 
6541 	err_data->ue_count = 0;
6542 	err_data->ce_count = 0;
6543 
6544 	mutex_lock(&adev->grbm_idx_mutex);
6545 
6546 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6547 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6548 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6549 				amdgpu_gfx_select_se_sh(adev, j, 0, k);
6550 				reg_value =
6551 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6552 				if (reg_value)
6553 					gfx_v9_0_ras_error_count(adev,
6554 						&gfx_v9_0_edc_counter_regs[i],
6555 						j, k, reg_value,
6556 						&sec_count, &ded_count);
6557 			}
6558 		}
6559 	}
6560 
6561 	err_data->ce_count += sec_count;
6562 	err_data->ue_count += ded_count;
6563 
6564 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6565 	mutex_unlock(&adev->grbm_idx_mutex);
6566 
6567 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6568 }
6569 
6570 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6571 {
6572 	const unsigned int cp_coher_cntl =
6573 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6574 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6575 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6576 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6577 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6578 
6579 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6580 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6581 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6582 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6583 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6584 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6585 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6586 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6587 }
6588 
6589 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6590 					uint32_t pipe, bool enable)
6591 {
6592 	struct amdgpu_device *adev = ring->adev;
6593 	uint32_t val;
6594 	uint32_t wcl_cs_reg;
6595 
6596 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6597 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6598 
6599 	switch (pipe) {
6600 	case 0:
6601 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6602 		break;
6603 	case 1:
6604 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6605 		break;
6606 	case 2:
6607 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6608 		break;
6609 	case 3:
6610 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6611 		break;
6612 	default:
6613 		DRM_DEBUG("invalid pipe %d\n", pipe);
6614 		return;
6615 	}
6616 
6617 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6618 
6619 }
6620 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6621 {
6622 	struct amdgpu_device *adev = ring->adev;
6623 	uint32_t val;
6624 	int i;
6625 
6626 
6627 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6628 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6629 	 * around 25% of gpu resources.
6630 	 */
6631 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6632 	amdgpu_ring_emit_wreg(ring,
6633 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6634 			      val);
6635 
6636 	/* Restrict waves for normal/low priority compute queues as well
6637 	 * to get best QoS for high priority compute jobs.
6638 	 *
6639 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6640 	 */
6641 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6642 		if (i != ring->pipe)
6643 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6644 
6645 	}
6646 }
6647 
6648 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6649 	.name = "gfx_v9_0",
6650 	.early_init = gfx_v9_0_early_init,
6651 	.late_init = gfx_v9_0_late_init,
6652 	.sw_init = gfx_v9_0_sw_init,
6653 	.sw_fini = gfx_v9_0_sw_fini,
6654 	.hw_init = gfx_v9_0_hw_init,
6655 	.hw_fini = gfx_v9_0_hw_fini,
6656 	.suspend = gfx_v9_0_suspend,
6657 	.resume = gfx_v9_0_resume,
6658 	.is_idle = gfx_v9_0_is_idle,
6659 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6660 	.soft_reset = gfx_v9_0_soft_reset,
6661 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6662 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6663 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6664 };
6665 
6666 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6667 	.type = AMDGPU_RING_TYPE_GFX,
6668 	.align_mask = 0xff,
6669 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6670 	.support_64bit_ptrs = true,
6671 	.secure_submission_supported = true,
6672 	.vmhub = AMDGPU_GFXHUB_0,
6673 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6674 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6675 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6676 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6677 		5 +  /* COND_EXEC */
6678 		7 +  /* PIPELINE_SYNC */
6679 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6680 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6681 		2 + /* VM_FLUSH */
6682 		8 +  /* FENCE for VM_FLUSH */
6683 		20 + /* GDS switch */
6684 		4 + /* double SWITCH_BUFFER,
6685 		       the first COND_EXEC jump to the place just
6686 			   prior to this double SWITCH_BUFFER  */
6687 		5 + /* COND_EXEC */
6688 		7 +	 /*	HDP_flush */
6689 		4 +	 /*	VGT_flush */
6690 		14 + /*	CE_META */
6691 		31 + /*	DE_META */
6692 		3 + /* CNTX_CTRL */
6693 		5 + /* HDP_INVL */
6694 		8 + 8 + /* FENCE x2 */
6695 		2 + /* SWITCH_BUFFER */
6696 		7, /* gfx_v9_0_emit_mem_sync */
6697 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6698 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6699 	.emit_fence = gfx_v9_0_ring_emit_fence,
6700 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6701 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6702 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6703 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6704 	.test_ring = gfx_v9_0_ring_test_ring,
6705 	.test_ib = gfx_v9_0_ring_test_ib,
6706 	.insert_nop = amdgpu_ring_insert_nop,
6707 	.pad_ib = amdgpu_ring_generic_pad_ib,
6708 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6709 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6710 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6711 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6712 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6713 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6714 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6715 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6716 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6717 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6718 };
6719 
6720 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6721 	.type = AMDGPU_RING_TYPE_COMPUTE,
6722 	.align_mask = 0xff,
6723 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6724 	.support_64bit_ptrs = true,
6725 	.vmhub = AMDGPU_GFXHUB_0,
6726 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6727 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6728 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6729 	.emit_frame_size =
6730 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6731 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6732 		5 + /* hdp invalidate */
6733 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6734 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6735 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6736 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6737 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6738 		7 + /* gfx_v9_0_emit_mem_sync */
6739 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6740 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6741 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6742 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6743 	.emit_fence = gfx_v9_0_ring_emit_fence,
6744 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6745 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6746 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6747 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6748 	.test_ring = gfx_v9_0_ring_test_ring,
6749 	.test_ib = gfx_v9_0_ring_test_ib,
6750 	.insert_nop = amdgpu_ring_insert_nop,
6751 	.pad_ib = amdgpu_ring_generic_pad_ib,
6752 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6753 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6754 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6755 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6756 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6757 };
6758 
6759 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6760 	.type = AMDGPU_RING_TYPE_KIQ,
6761 	.align_mask = 0xff,
6762 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6763 	.support_64bit_ptrs = true,
6764 	.vmhub = AMDGPU_GFXHUB_0,
6765 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6766 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6767 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6768 	.emit_frame_size =
6769 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6770 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6771 		5 + /* hdp invalidate */
6772 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6773 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6774 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6775 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6776 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6777 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6778 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6779 	.test_ring = gfx_v9_0_ring_test_ring,
6780 	.insert_nop = amdgpu_ring_insert_nop,
6781 	.pad_ib = amdgpu_ring_generic_pad_ib,
6782 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6783 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6784 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6785 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6786 };
6787 
6788 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6789 {
6790 	int i;
6791 
6792 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6793 
6794 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6795 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6796 
6797 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6798 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6799 }
6800 
6801 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6802 	.set = gfx_v9_0_set_eop_interrupt_state,
6803 	.process = gfx_v9_0_eop_irq,
6804 };
6805 
6806 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6807 	.set = gfx_v9_0_set_priv_reg_fault_state,
6808 	.process = gfx_v9_0_priv_reg_irq,
6809 };
6810 
6811 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6812 	.set = gfx_v9_0_set_priv_inst_fault_state,
6813 	.process = gfx_v9_0_priv_inst_irq,
6814 };
6815 
6816 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6817 	.set = gfx_v9_0_set_cp_ecc_error_state,
6818 	.process = amdgpu_gfx_cp_ecc_error_irq,
6819 };
6820 
6821 
6822 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6823 {
6824 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6825 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6826 
6827 	adev->gfx.priv_reg_irq.num_types = 1;
6828 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6829 
6830 	adev->gfx.priv_inst_irq.num_types = 1;
6831 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6832 
6833 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6834 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6835 }
6836 
6837 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6838 {
6839 	switch (adev->ip_versions[GC_HWIP][0]) {
6840 	case IP_VERSION(9, 0, 1):
6841 	case IP_VERSION(9, 2, 1):
6842 	case IP_VERSION(9, 4, 0):
6843 	case IP_VERSION(9, 2, 2):
6844 	case IP_VERSION(9, 1, 0):
6845 	case IP_VERSION(9, 4, 1):
6846 	case IP_VERSION(9, 3, 0):
6847 	case IP_VERSION(9, 4, 2):
6848 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6849 		break;
6850 	default:
6851 		break;
6852 	}
6853 }
6854 
6855 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6856 {
6857 	/* init asci gds info */
6858 	switch (adev->ip_versions[GC_HWIP][0]) {
6859 	case IP_VERSION(9, 0, 1):
6860 	case IP_VERSION(9, 2, 1):
6861 	case IP_VERSION(9, 4, 0):
6862 		adev->gds.gds_size = 0x10000;
6863 		break;
6864 	case IP_VERSION(9, 2, 2):
6865 	case IP_VERSION(9, 1, 0):
6866 	case IP_VERSION(9, 4, 1):
6867 		adev->gds.gds_size = 0x1000;
6868 		break;
6869 	case IP_VERSION(9, 4, 2):
6870 		/* aldebaran removed all the GDS internal memory,
6871 		 * only support GWS opcode in kernel, like barrier
6872 		 * semaphore.etc */
6873 		adev->gds.gds_size = 0;
6874 		break;
6875 	default:
6876 		adev->gds.gds_size = 0x10000;
6877 		break;
6878 	}
6879 
6880 	switch (adev->ip_versions[GC_HWIP][0]) {
6881 	case IP_VERSION(9, 0, 1):
6882 	case IP_VERSION(9, 4, 0):
6883 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6884 		break;
6885 	case IP_VERSION(9, 2, 1):
6886 		adev->gds.gds_compute_max_wave_id = 0x27f;
6887 		break;
6888 	case IP_VERSION(9, 2, 2):
6889 	case IP_VERSION(9, 1, 0):
6890 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6891 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6892 		else
6893 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6894 		break;
6895 	case IP_VERSION(9, 4, 1):
6896 		adev->gds.gds_compute_max_wave_id = 0xfff;
6897 		break;
6898 	case IP_VERSION(9, 4, 2):
6899 		/* deprecated for Aldebaran, no usage at all */
6900 		adev->gds.gds_compute_max_wave_id = 0;
6901 		break;
6902 	default:
6903 		/* this really depends on the chip */
6904 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6905 		break;
6906 	}
6907 
6908 	adev->gds.gws_size = 64;
6909 	adev->gds.oa_size = 16;
6910 }
6911 
6912 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6913 						 u32 bitmap)
6914 {
6915 	u32 data;
6916 
6917 	if (!bitmap)
6918 		return;
6919 
6920 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6921 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6922 
6923 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6924 }
6925 
6926 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6927 {
6928 	u32 data, mask;
6929 
6930 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6931 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6932 
6933 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6934 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6935 
6936 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6937 
6938 	return (~data) & mask;
6939 }
6940 
6941 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6942 				 struct amdgpu_cu_info *cu_info)
6943 {
6944 	int i, j, k, counter, active_cu_number = 0;
6945 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6946 	unsigned disable_masks[4 * 4];
6947 
6948 	if (!adev || !cu_info)
6949 		return -EINVAL;
6950 
6951 	/*
6952 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6953 	 */
6954 	if (adev->gfx.config.max_shader_engines *
6955 		adev->gfx.config.max_sh_per_se > 16)
6956 		return -EINVAL;
6957 
6958 	amdgpu_gfx_parse_disable_cu(disable_masks,
6959 				    adev->gfx.config.max_shader_engines,
6960 				    adev->gfx.config.max_sh_per_se);
6961 
6962 	mutex_lock(&adev->grbm_idx_mutex);
6963 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6964 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6965 			mask = 1;
6966 			ao_bitmap = 0;
6967 			counter = 0;
6968 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
6969 			gfx_v9_0_set_user_cu_inactive_bitmap(
6970 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6971 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6972 
6973 			/*
6974 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6975 			 * 4x4 size array, and it's usually suitable for Vega
6976 			 * ASICs which has 4*2 SE/SH layout.
6977 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6978 			 * To mostly reduce the impact, we make it compatible
6979 			 * with current bitmap array as below:
6980 			 *    SE4,SH0 --> bitmap[0][1]
6981 			 *    SE5,SH0 --> bitmap[1][1]
6982 			 *    SE6,SH0 --> bitmap[2][1]
6983 			 *    SE7,SH0 --> bitmap[3][1]
6984 			 */
6985 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6986 
6987 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6988 				if (bitmap & mask) {
6989 					if (counter < adev->gfx.config.max_cu_per_sh)
6990 						ao_bitmap |= mask;
6991 					counter ++;
6992 				}
6993 				mask <<= 1;
6994 			}
6995 			active_cu_number += counter;
6996 			if (i < 2 && j < 2)
6997 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6998 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6999 		}
7000 	}
7001 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7002 	mutex_unlock(&adev->grbm_idx_mutex);
7003 
7004 	cu_info->number = active_cu_number;
7005 	cu_info->ao_cu_mask = ao_cu_mask;
7006 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7007 
7008 	return 0;
7009 }
7010 
7011 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7012 {
7013 	.type = AMD_IP_BLOCK_TYPE_GFX,
7014 	.major = 9,
7015 	.minor = 0,
7016 	.rev = 0,
7017 	.funcs = &gfx_v9_0_ip_funcs,
7018 };
7019