xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision c4a11bf4)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 enum ta_ras_gfx_subblock {
144 	/*CPC*/
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
146 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
147 	TA_RAS_BLOCK__GFX_CPC_UCODE,
148 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
149 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
150 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
151 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
152 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
153 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
155 	/* CPF*/
156 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
157 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
158 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
159 	TA_RAS_BLOCK__GFX_CPF_TAG,
160 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
161 	/* CPG*/
162 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
164 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
165 	TA_RAS_BLOCK__GFX_CPG_TAG,
166 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
167 	/* GDS*/
168 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
169 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
170 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
171 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
172 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
173 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
174 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
175 	/* SPI*/
176 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
177 	/* SQ*/
178 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
180 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
181 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
182 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
184 	/* SQC (3 ranges)*/
185 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
186 	/* SQC range 0*/
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
189 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
190 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
191 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
192 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
193 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
194 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
197 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
198 	/* SQC range 1*/
199 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
200 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
201 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
202 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
203 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
204 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
205 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
206 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
207 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
209 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
211 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
212 	/* SQC range 2*/
213 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
214 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
215 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
216 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
218 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
220 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
221 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
223 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
225 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
227 	/* TA*/
228 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
231 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
232 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
233 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
234 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
235 	/* TCA*/
236 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
239 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
240 	/* TCC (5 sub-ranges)*/
241 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
242 	/* TCC range 0*/
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
244 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
245 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
246 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
247 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
248 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
250 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
251 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
253 	/* TCC range 1*/
254 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
255 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
256 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
258 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
259 	/* TCC range 2*/
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
261 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
262 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
263 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
264 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
265 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
266 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
267 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
268 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
270 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
271 	/* TCC range 3*/
272 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
273 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
274 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
276 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
277 	/* TCC range 4*/
278 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
279 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
280 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
281 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
282 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
283 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
285 	/* TCI*/
286 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
287 	/* TCP*/
288 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
289 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
291 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
292 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
293 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
294 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
295 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
296 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
297 	/* TD*/
298 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
300 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
301 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
302 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
303 	/* EA (3 sub-ranges)*/
304 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
305 	/* EA range 0*/
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
307 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
308 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
309 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
310 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
311 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
312 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
316 	/* EA range 1*/
317 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
318 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
319 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
320 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
324 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
326 	/* EA range 2*/
327 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
328 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
329 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
330 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
331 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
333 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
334 	/* UTC VM L2 bank*/
335 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
336 	/* UTC VM walker*/
337 	TA_RAS_BLOCK__UTC_VML2_WALKER,
338 	/* UTC ATC L2 2MB cache*/
339 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
340 	/* UTC ATC L2 4KB cache*/
341 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
342 	TA_RAS_BLOCK__GFX_MAX
343 };
344 
345 struct ras_gfx_subblock {
346 	unsigned char *name;
347 	int ta_subblock;
348 	int hw_supported_error_type;
349 	int sw_supported_error_type;
350 };
351 
352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
353 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
354 		#subblock,                                                     \
355 		TA_RAS_BLOCK__##subblock,                                      \
356 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
357 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
358 	}
359 
360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
378 			     0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
380 			     0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
389 			     0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391 			     0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
393 			     0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
395 			     0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
397 			     0, 0),
398 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
399 			     0),
400 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
401 			     1),
402 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
403 			     0, 0, 0),
404 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 			     0),
406 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407 			     0),
408 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
409 			     0),
410 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
411 			     0),
412 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413 			     0),
414 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
415 			     0, 0),
416 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 			     0),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
419 			     0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
421 			     0, 0, 0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 			     0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
425 			     0),
426 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
427 			     0),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
429 			     0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
431 			     0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
433 			     0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
435 			     0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
445 			     1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
447 			     1),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
449 			     1),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
451 			     0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
453 			     0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
466 			     0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
469 			     0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
471 			     0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
473 			     0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
508 };
509 
510 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
511 {
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
532 };
533 
534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
535 {
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
554 };
555 
556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
557 {
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
569 };
570 
571 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
572 {
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
597 };
598 
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
600 {
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
608 };
609 
610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
611 {
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
631 };
632 
633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
634 {
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
654 };
655 
656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
657 {
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
674 };
675 
676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
677 {
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
691 };
692 
693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
694 {
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
706 };
707 
708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
709 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
710 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
711 };
712 
713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
714 {
715 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 };
724 
725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
726 {
727 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 };
736 
737 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
738 {
739 	static void *scratch_reg0;
740 	static void *scratch_reg1;
741 	static void *scratch_reg2;
742 	static void *scratch_reg3;
743 	static void *spare_int;
744 	static uint32_t grbm_cntl;
745 	static uint32_t grbm_idx;
746 
747 	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
748 	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
749 	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
750 	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
751 	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
752 
753 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
754 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
755 
756 	if (amdgpu_sriov_runtime(adev)) {
757 		pr_err("shouldn't call rlcg write register during runtime\n");
758 		return;
759 	}
760 
761 	if (offset == grbm_cntl || offset == grbm_idx) {
762 		if (offset  == grbm_cntl)
763 			writel(v, scratch_reg2);
764 		else if (offset == grbm_idx)
765 			writel(v, scratch_reg3);
766 
767 		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
768 	} else {
769 		uint32_t i = 0;
770 		uint32_t retries = 50000;
771 
772 		writel(v, scratch_reg0);
773 		writel(offset | 0x80000000, scratch_reg1);
774 		writel(1, spare_int);
775 		for (i = 0; i < retries; i++) {
776 			u32 tmp;
777 
778 			tmp = readl(scratch_reg1);
779 			if (!(tmp & 0x80000000))
780 				break;
781 
782 			udelay(10);
783 		}
784 		if (i >= retries)
785 			pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
786 	}
787 
788 }
789 
790 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
791 			       u32 v, u32 acc_flags, u32 hwip)
792 {
793 	if ((acc_flags & AMDGPU_REGS_RLC) &&
794 	    amdgpu_sriov_fullaccess(adev)) {
795 		gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
796 
797 		return;
798 	}
799 
800 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
801 		WREG32_NO_KIQ(offset, v);
802 	else
803 		WREG32(offset, v);
804 }
805 
806 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
807 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
808 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
809 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
810 
811 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
812 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
813 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
814 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
815 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
816 				struct amdgpu_cu_info *cu_info);
817 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
818 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
819 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
820 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
821 					  void *ras_error_status);
822 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
823 				     void *inject_if);
824 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
825 
826 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
827 				uint64_t queue_mask)
828 {
829 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
830 	amdgpu_ring_write(kiq_ring,
831 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
832 		/* vmid_mask:0* queue_type:0 (KIQ) */
833 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
834 	amdgpu_ring_write(kiq_ring,
835 			lower_32_bits(queue_mask));	/* queue mask lo */
836 	amdgpu_ring_write(kiq_ring,
837 			upper_32_bits(queue_mask));	/* queue mask hi */
838 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
839 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
840 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
841 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
842 }
843 
844 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
845 				 struct amdgpu_ring *ring)
846 {
847 	struct amdgpu_device *adev = kiq_ring->adev;
848 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
849 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
850 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
851 
852 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
853 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
854 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
856 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
857 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
858 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
859 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
860 			 /*queue_type: normal compute queue */
861 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
862 			 /* alloc format: all_on_one_pipe */
863 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
864 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
865 			 /* num_queues: must be 1 */
866 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
867 	amdgpu_ring_write(kiq_ring,
868 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
869 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
870 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
871 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
872 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
873 }
874 
875 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
876 				   struct amdgpu_ring *ring,
877 				   enum amdgpu_unmap_queues_action action,
878 				   u64 gpu_addr, u64 seq)
879 {
880 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
881 
882 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
883 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
884 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
885 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
886 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
887 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
888 	amdgpu_ring_write(kiq_ring,
889 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
890 
891 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
892 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
893 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
894 		amdgpu_ring_write(kiq_ring, seq);
895 	} else {
896 		amdgpu_ring_write(kiq_ring, 0);
897 		amdgpu_ring_write(kiq_ring, 0);
898 		amdgpu_ring_write(kiq_ring, 0);
899 	}
900 }
901 
902 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
903 				   struct amdgpu_ring *ring,
904 				   u64 addr,
905 				   u64 seq)
906 {
907 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
910 	amdgpu_ring_write(kiq_ring,
911 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
912 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
913 			  PACKET3_QUERY_STATUS_COMMAND(2));
914 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
915 	amdgpu_ring_write(kiq_ring,
916 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
917 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
920 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
921 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
922 }
923 
924 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
925 				uint16_t pasid, uint32_t flush_type,
926 				bool all_hub)
927 {
928 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
929 	amdgpu_ring_write(kiq_ring,
930 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
931 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
932 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
933 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
934 }
935 
936 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
937 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
938 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
939 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
940 	.kiq_query_status = gfx_v9_0_kiq_query_status,
941 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
942 	.set_resources_size = 8,
943 	.map_queues_size = 7,
944 	.unmap_queues_size = 6,
945 	.query_status_size = 7,
946 	.invalidate_tlbs_size = 2,
947 };
948 
949 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
950 {
951 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
952 }
953 
954 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
955 {
956 	switch (adev->ip_versions[GC_HWIP][0]) {
957 	case IP_VERSION(9, 0, 1):
958 		soc15_program_register_sequence(adev,
959 						golden_settings_gc_9_0,
960 						ARRAY_SIZE(golden_settings_gc_9_0));
961 		soc15_program_register_sequence(adev,
962 						golden_settings_gc_9_0_vg10,
963 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
964 		break;
965 	case IP_VERSION(9, 2, 1):
966 		soc15_program_register_sequence(adev,
967 						golden_settings_gc_9_2_1,
968 						ARRAY_SIZE(golden_settings_gc_9_2_1));
969 		soc15_program_register_sequence(adev,
970 						golden_settings_gc_9_2_1_vg12,
971 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
972 		break;
973 	case IP_VERSION(9, 4, 0):
974 		soc15_program_register_sequence(adev,
975 						golden_settings_gc_9_0,
976 						ARRAY_SIZE(golden_settings_gc_9_0));
977 		soc15_program_register_sequence(adev,
978 						golden_settings_gc_9_0_vg20,
979 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
980 		break;
981 	case IP_VERSION(9, 4, 1):
982 		soc15_program_register_sequence(adev,
983 						golden_settings_gc_9_4_1_arct,
984 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
985 		break;
986 	case IP_VERSION(9, 2, 2):
987 	case IP_VERSION(9, 1, 0):
988 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
989 						ARRAY_SIZE(golden_settings_gc_9_1));
990 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
991 			soc15_program_register_sequence(adev,
992 							golden_settings_gc_9_1_rv2,
993 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
994 		else
995 			soc15_program_register_sequence(adev,
996 							golden_settings_gc_9_1_rv1,
997 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
998 		break;
999 	 case IP_VERSION(9, 3, 0):
1000 		soc15_program_register_sequence(adev,
1001 						golden_settings_gc_9_1_rn,
1002 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1003 		return; /* for renoir, don't need common goldensetting */
1004 	case IP_VERSION(9, 4, 2):
1005 		gfx_v9_4_2_init_golden_registers(adev,
1006 						 adev->smuio.funcs->get_die_id(adev));
1007 		break;
1008 	default:
1009 		break;
1010 	}
1011 
1012 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1013 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1014 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1015 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1016 }
1017 
1018 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1019 {
1020 	adev->gfx.scratch.num_reg = 8;
1021 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1022 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1023 }
1024 
1025 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1026 				       bool wc, uint32_t reg, uint32_t val)
1027 {
1028 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1029 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1030 				WRITE_DATA_DST_SEL(0) |
1031 				(wc ? WR_CONFIRM : 0));
1032 	amdgpu_ring_write(ring, reg);
1033 	amdgpu_ring_write(ring, 0);
1034 	amdgpu_ring_write(ring, val);
1035 }
1036 
1037 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1038 				  int mem_space, int opt, uint32_t addr0,
1039 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1040 				  uint32_t inv)
1041 {
1042 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1043 	amdgpu_ring_write(ring,
1044 				 /* memory (1) or register (0) */
1045 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1046 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1047 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1048 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1049 
1050 	if (mem_space)
1051 		BUG_ON(addr0 & 0x3); /* Dword align */
1052 	amdgpu_ring_write(ring, addr0);
1053 	amdgpu_ring_write(ring, addr1);
1054 	amdgpu_ring_write(ring, ref);
1055 	amdgpu_ring_write(ring, mask);
1056 	amdgpu_ring_write(ring, inv); /* poll interval */
1057 }
1058 
1059 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1060 {
1061 	struct amdgpu_device *adev = ring->adev;
1062 	uint32_t scratch;
1063 	uint32_t tmp = 0;
1064 	unsigned i;
1065 	int r;
1066 
1067 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1068 	if (r)
1069 		return r;
1070 
1071 	WREG32(scratch, 0xCAFEDEAD);
1072 	r = amdgpu_ring_alloc(ring, 3);
1073 	if (r)
1074 		goto error_free_scratch;
1075 
1076 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1077 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1078 	amdgpu_ring_write(ring, 0xDEADBEEF);
1079 	amdgpu_ring_commit(ring);
1080 
1081 	for (i = 0; i < adev->usec_timeout; i++) {
1082 		tmp = RREG32(scratch);
1083 		if (tmp == 0xDEADBEEF)
1084 			break;
1085 		udelay(1);
1086 	}
1087 
1088 	if (i >= adev->usec_timeout)
1089 		r = -ETIMEDOUT;
1090 
1091 error_free_scratch:
1092 	amdgpu_gfx_scratch_free(adev, scratch);
1093 	return r;
1094 }
1095 
1096 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1097 {
1098 	struct amdgpu_device *adev = ring->adev;
1099 	struct amdgpu_ib ib;
1100 	struct dma_fence *f = NULL;
1101 
1102 	unsigned index;
1103 	uint64_t gpu_addr;
1104 	uint32_t tmp;
1105 	long r;
1106 
1107 	r = amdgpu_device_wb_get(adev, &index);
1108 	if (r)
1109 		return r;
1110 
1111 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1112 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1113 	memset(&ib, 0, sizeof(ib));
1114 	r = amdgpu_ib_get(adev, NULL, 16,
1115 					AMDGPU_IB_POOL_DIRECT, &ib);
1116 	if (r)
1117 		goto err1;
1118 
1119 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1120 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1121 	ib.ptr[2] = lower_32_bits(gpu_addr);
1122 	ib.ptr[3] = upper_32_bits(gpu_addr);
1123 	ib.ptr[4] = 0xDEADBEEF;
1124 	ib.length_dw = 5;
1125 
1126 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1127 	if (r)
1128 		goto err2;
1129 
1130 	r = dma_fence_wait_timeout(f, false, timeout);
1131 	if (r == 0) {
1132 		r = -ETIMEDOUT;
1133 		goto err2;
1134 	} else if (r < 0) {
1135 		goto err2;
1136 	}
1137 
1138 	tmp = adev->wb.wb[index];
1139 	if (tmp == 0xDEADBEEF)
1140 		r = 0;
1141 	else
1142 		r = -EINVAL;
1143 
1144 err2:
1145 	amdgpu_ib_free(adev, &ib, NULL);
1146 	dma_fence_put(f);
1147 err1:
1148 	amdgpu_device_wb_free(adev, index);
1149 	return r;
1150 }
1151 
1152 
1153 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1154 {
1155 	release_firmware(adev->gfx.pfp_fw);
1156 	adev->gfx.pfp_fw = NULL;
1157 	release_firmware(adev->gfx.me_fw);
1158 	adev->gfx.me_fw = NULL;
1159 	release_firmware(adev->gfx.ce_fw);
1160 	adev->gfx.ce_fw = NULL;
1161 	release_firmware(adev->gfx.rlc_fw);
1162 	adev->gfx.rlc_fw = NULL;
1163 	release_firmware(adev->gfx.mec_fw);
1164 	adev->gfx.mec_fw = NULL;
1165 	release_firmware(adev->gfx.mec2_fw);
1166 	adev->gfx.mec2_fw = NULL;
1167 
1168 	kfree(adev->gfx.rlc.register_list_format);
1169 }
1170 
1171 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1172 {
1173 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1174 
1175 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1176 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1177 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1178 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1179 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1180 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1181 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1182 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1183 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1184 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1185 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1186 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1187 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1188 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1189 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1190 }
1191 
1192 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1193 {
1194 	adev->gfx.me_fw_write_wait = false;
1195 	adev->gfx.mec_fw_write_wait = false;
1196 
1197 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1198 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1199 	    (adev->gfx.mec_feature_version < 46) ||
1200 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1201 	    (adev->gfx.pfp_feature_version < 46)))
1202 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1203 
1204 	switch (adev->ip_versions[GC_HWIP][0]) {
1205 	case IP_VERSION(9, 0, 1):
1206 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1207 		    (adev->gfx.me_feature_version >= 42) &&
1208 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1209 		    (adev->gfx.pfp_feature_version >= 42))
1210 			adev->gfx.me_fw_write_wait = true;
1211 
1212 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1213 		    (adev->gfx.mec_feature_version >= 42))
1214 			adev->gfx.mec_fw_write_wait = true;
1215 		break;
1216 	case IP_VERSION(9, 2, 1):
1217 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1218 		    (adev->gfx.me_feature_version >= 44) &&
1219 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1220 		    (adev->gfx.pfp_feature_version >= 44))
1221 			adev->gfx.me_fw_write_wait = true;
1222 
1223 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1224 		    (adev->gfx.mec_feature_version >= 44))
1225 			adev->gfx.mec_fw_write_wait = true;
1226 		break;
1227 	case IP_VERSION(9, 4, 0):
1228 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1229 		    (adev->gfx.me_feature_version >= 44) &&
1230 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1231 		    (adev->gfx.pfp_feature_version >= 44))
1232 			adev->gfx.me_fw_write_wait = true;
1233 
1234 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1235 		    (adev->gfx.mec_feature_version >= 44))
1236 			adev->gfx.mec_fw_write_wait = true;
1237 		break;
1238 	case IP_VERSION(9, 1, 0):
1239 	case IP_VERSION(9, 2, 2):
1240 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1241 		    (adev->gfx.me_feature_version >= 42) &&
1242 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1243 		    (adev->gfx.pfp_feature_version >= 42))
1244 			adev->gfx.me_fw_write_wait = true;
1245 
1246 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1247 		    (adev->gfx.mec_feature_version >= 42))
1248 			adev->gfx.mec_fw_write_wait = true;
1249 		break;
1250 	default:
1251 		adev->gfx.me_fw_write_wait = true;
1252 		adev->gfx.mec_fw_write_wait = true;
1253 		break;
1254 	}
1255 }
1256 
1257 struct amdgpu_gfxoff_quirk {
1258 	u16 chip_vendor;
1259 	u16 chip_device;
1260 	u16 subsys_vendor;
1261 	u16 subsys_device;
1262 	u8 revision;
1263 };
1264 
1265 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1266 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1267 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1268 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1269 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1270 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1271 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1272 	{ 0, 0, 0, 0, 0 },
1273 };
1274 
1275 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1276 {
1277 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1278 
1279 	while (p && p->chip_device != 0) {
1280 		if (pdev->vendor == p->chip_vendor &&
1281 		    pdev->device == p->chip_device &&
1282 		    pdev->subsystem_vendor == p->subsys_vendor &&
1283 		    pdev->subsystem_device == p->subsys_device &&
1284 		    pdev->revision == p->revision) {
1285 			return true;
1286 		}
1287 		++p;
1288 	}
1289 	return false;
1290 }
1291 
1292 static bool is_raven_kicker(struct amdgpu_device *adev)
1293 {
1294 	if (adev->pm.fw_version >= 0x41e2b)
1295 		return true;
1296 	else
1297 		return false;
1298 }
1299 
1300 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1301 {
1302 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1303 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1304 	    (adev->gfx.me_feature_version >= 52))
1305 		return true;
1306 	else
1307 		return false;
1308 }
1309 
1310 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1311 {
1312 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1313 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1314 
1315 	switch (adev->ip_versions[GC_HWIP][0]) {
1316 	case IP_VERSION(9, 0, 1):
1317 	case IP_VERSION(9, 2, 1):
1318 	case IP_VERSION(9, 4, 0):
1319 		break;
1320 	case IP_VERSION(9, 2, 2):
1321 	case IP_VERSION(9, 1, 0):
1322 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1323 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1324 		    ((!is_raven_kicker(adev) &&
1325 		      adev->gfx.rlc_fw_version < 531) ||
1326 		     (adev->gfx.rlc_feature_version < 1) ||
1327 		     !adev->gfx.rlc.is_rlc_v2_1))
1328 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1329 
1330 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1331 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1332 				AMD_PG_SUPPORT_CP |
1333 				AMD_PG_SUPPORT_RLC_SMU_HS;
1334 		break;
1335 	case IP_VERSION(9, 3, 0):
1336 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1337 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1338 				AMD_PG_SUPPORT_CP |
1339 				AMD_PG_SUPPORT_RLC_SMU_HS;
1340 		break;
1341 	default:
1342 		break;
1343 	}
1344 }
1345 
1346 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1347 					  const char *chip_name)
1348 {
1349 	char fw_name[30];
1350 	int err;
1351 	struct amdgpu_firmware_info *info = NULL;
1352 	const struct common_firmware_header *header = NULL;
1353 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1354 
1355 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1356 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1357 	if (err)
1358 		goto out;
1359 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1360 	if (err)
1361 		goto out;
1362 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1363 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1364 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1365 
1366 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1367 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1368 	if (err)
1369 		goto out;
1370 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1371 	if (err)
1372 		goto out;
1373 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1374 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1375 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1376 
1377 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1378 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1379 	if (err)
1380 		goto out;
1381 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1382 	if (err)
1383 		goto out;
1384 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1385 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1386 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1387 
1388 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1389 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1390 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1391 		info->fw = adev->gfx.pfp_fw;
1392 		header = (const struct common_firmware_header *)info->fw->data;
1393 		adev->firmware.fw_size +=
1394 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1395 
1396 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1397 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1398 		info->fw = adev->gfx.me_fw;
1399 		header = (const struct common_firmware_header *)info->fw->data;
1400 		adev->firmware.fw_size +=
1401 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1402 
1403 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1404 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1405 		info->fw = adev->gfx.ce_fw;
1406 		header = (const struct common_firmware_header *)info->fw->data;
1407 		adev->firmware.fw_size +=
1408 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1409 	}
1410 
1411 out:
1412 	if (err) {
1413 		dev_err(adev->dev,
1414 			"gfx9: Failed to load firmware \"%s\"\n",
1415 			fw_name);
1416 		release_firmware(adev->gfx.pfp_fw);
1417 		adev->gfx.pfp_fw = NULL;
1418 		release_firmware(adev->gfx.me_fw);
1419 		adev->gfx.me_fw = NULL;
1420 		release_firmware(adev->gfx.ce_fw);
1421 		adev->gfx.ce_fw = NULL;
1422 	}
1423 	return err;
1424 }
1425 
1426 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1427 					  const char *chip_name)
1428 {
1429 	char fw_name[30];
1430 	int err;
1431 	struct amdgpu_firmware_info *info = NULL;
1432 	const struct common_firmware_header *header = NULL;
1433 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1434 	unsigned int *tmp = NULL;
1435 	unsigned int i = 0;
1436 	uint16_t version_major;
1437 	uint16_t version_minor;
1438 	uint32_t smu_version;
1439 
1440 	/*
1441 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1442 	 * instead of picasso_rlc.bin.
1443 	 * Judgment method:
1444 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1445 	 *          or revision >= 0xD8 && revision <= 0xDF
1446 	 * otherwise is PCO FP5
1447 	 */
1448 	if (!strcmp(chip_name, "picasso") &&
1449 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1450 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1451 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1452 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1453 		(smu_version >= 0x41e2b))
1454 		/**
1455 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1456 		*/
1457 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1458 	else
1459 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1460 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1461 	if (err)
1462 		goto out;
1463 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1464 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1465 
1466 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1467 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1468 	if (version_major == 2 && version_minor == 1)
1469 		adev->gfx.rlc.is_rlc_v2_1 = true;
1470 
1471 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1472 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1473 	adev->gfx.rlc.save_and_restore_offset =
1474 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1475 	adev->gfx.rlc.clear_state_descriptor_offset =
1476 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1477 	adev->gfx.rlc.avail_scratch_ram_locations =
1478 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1479 	adev->gfx.rlc.reg_restore_list_size =
1480 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1481 	adev->gfx.rlc.reg_list_format_start =
1482 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1483 	adev->gfx.rlc.reg_list_format_separate_start =
1484 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1485 	adev->gfx.rlc.starting_offsets_start =
1486 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1487 	adev->gfx.rlc.reg_list_format_size_bytes =
1488 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1489 	adev->gfx.rlc.reg_list_size_bytes =
1490 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1491 	adev->gfx.rlc.register_list_format =
1492 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1493 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1494 	if (!adev->gfx.rlc.register_list_format) {
1495 		err = -ENOMEM;
1496 		goto out;
1497 	}
1498 
1499 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1500 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1501 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1502 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1503 
1504 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1505 
1506 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1507 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1508 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1509 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1510 
1511 	if (adev->gfx.rlc.is_rlc_v2_1)
1512 		gfx_v9_0_init_rlc_ext_microcode(adev);
1513 
1514 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1515 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1516 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1517 		info->fw = adev->gfx.rlc_fw;
1518 		header = (const struct common_firmware_header *)info->fw->data;
1519 		adev->firmware.fw_size +=
1520 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1521 
1522 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1523 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1524 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1525 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1526 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1527 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1528 			info->fw = adev->gfx.rlc_fw;
1529 			adev->firmware.fw_size +=
1530 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1531 
1532 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1533 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1534 			info->fw = adev->gfx.rlc_fw;
1535 			adev->firmware.fw_size +=
1536 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1537 
1538 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1539 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1540 			info->fw = adev->gfx.rlc_fw;
1541 			adev->firmware.fw_size +=
1542 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1543 		}
1544 	}
1545 
1546 out:
1547 	if (err) {
1548 		dev_err(adev->dev,
1549 			"gfx9: Failed to load firmware \"%s\"\n",
1550 			fw_name);
1551 		release_firmware(adev->gfx.rlc_fw);
1552 		adev->gfx.rlc_fw = NULL;
1553 	}
1554 	return err;
1555 }
1556 
1557 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1558 {
1559 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1560 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1561 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1562 		return false;
1563 
1564 	return true;
1565 }
1566 
1567 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1568 					  const char *chip_name)
1569 {
1570 	char fw_name[30];
1571 	int err;
1572 	struct amdgpu_firmware_info *info = NULL;
1573 	const struct common_firmware_header *header = NULL;
1574 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1575 
1576 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1577 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1578 	if (err)
1579 		goto out;
1580 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1581 	if (err)
1582 		goto out;
1583 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1584 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1585 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1586 
1587 
1588 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1589 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1590 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1591 		if (!err) {
1592 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1593 			if (err)
1594 				goto out;
1595 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1596 			adev->gfx.mec2_fw->data;
1597 			adev->gfx.mec2_fw_version =
1598 			le32_to_cpu(cp_hdr->header.ucode_version);
1599 			adev->gfx.mec2_feature_version =
1600 			le32_to_cpu(cp_hdr->ucode_feature_version);
1601 		} else {
1602 			err = 0;
1603 			adev->gfx.mec2_fw = NULL;
1604 		}
1605 	} else {
1606 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1607 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1608 	}
1609 
1610 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1611 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1612 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1613 		info->fw = adev->gfx.mec_fw;
1614 		header = (const struct common_firmware_header *)info->fw->data;
1615 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1616 		adev->firmware.fw_size +=
1617 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1618 
1619 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1620 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1621 		info->fw = adev->gfx.mec_fw;
1622 		adev->firmware.fw_size +=
1623 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1624 
1625 		if (adev->gfx.mec2_fw) {
1626 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1627 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1628 			info->fw = adev->gfx.mec2_fw;
1629 			header = (const struct common_firmware_header *)info->fw->data;
1630 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1631 			adev->firmware.fw_size +=
1632 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1633 
1634 			/* TODO: Determine if MEC2 JT FW loading can be removed
1635 				 for all GFX V9 asic and above */
1636 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1637 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1638 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1639 				info->fw = adev->gfx.mec2_fw;
1640 				adev->firmware.fw_size +=
1641 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1642 					PAGE_SIZE);
1643 			}
1644 		}
1645 	}
1646 
1647 out:
1648 	gfx_v9_0_check_if_need_gfxoff(adev);
1649 	gfx_v9_0_check_fw_write_wait(adev);
1650 	if (err) {
1651 		dev_err(adev->dev,
1652 			"gfx9: Failed to load firmware \"%s\"\n",
1653 			fw_name);
1654 		release_firmware(adev->gfx.mec_fw);
1655 		adev->gfx.mec_fw = NULL;
1656 		release_firmware(adev->gfx.mec2_fw);
1657 		adev->gfx.mec2_fw = NULL;
1658 	}
1659 	return err;
1660 }
1661 
1662 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1663 {
1664 	const char *chip_name;
1665 	int r;
1666 
1667 	DRM_DEBUG("\n");
1668 
1669 	switch (adev->ip_versions[GC_HWIP][0]) {
1670 	case IP_VERSION(9, 0, 1):
1671 		chip_name = "vega10";
1672 		break;
1673 	case IP_VERSION(9, 2, 1):
1674 		chip_name = "vega12";
1675 		break;
1676 	case IP_VERSION(9, 4, 0):
1677 		chip_name = "vega20";
1678 		break;
1679 	case IP_VERSION(9, 2, 2):
1680 	case IP_VERSION(9, 1, 0):
1681 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1682 			chip_name = "raven2";
1683 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1684 			chip_name = "picasso";
1685 		else
1686 			chip_name = "raven";
1687 		break;
1688 	case IP_VERSION(9, 4, 1):
1689 		chip_name = "arcturus";
1690 		break;
1691 	case IP_VERSION(9, 3, 0):
1692 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1693 			chip_name = "renoir";
1694 		else
1695 			chip_name = "green_sardine";
1696 		break;
1697 	case IP_VERSION(9, 4, 2):
1698 		chip_name = "aldebaran";
1699 		break;
1700 	default:
1701 		BUG();
1702 	}
1703 
1704 	/* No CPG in Arcturus */
1705 	if (adev->gfx.num_gfx_rings) {
1706 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1707 		if (r)
1708 			return r;
1709 	}
1710 
1711 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1712 	if (r)
1713 		return r;
1714 
1715 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1716 	if (r)
1717 		return r;
1718 
1719 	return r;
1720 }
1721 
1722 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1723 {
1724 	u32 count = 0;
1725 	const struct cs_section_def *sect = NULL;
1726 	const struct cs_extent_def *ext = NULL;
1727 
1728 	/* begin clear state */
1729 	count += 2;
1730 	/* context control state */
1731 	count += 3;
1732 
1733 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1734 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1735 			if (sect->id == SECT_CONTEXT)
1736 				count += 2 + ext->reg_count;
1737 			else
1738 				return 0;
1739 		}
1740 	}
1741 
1742 	/* end clear state */
1743 	count += 2;
1744 	/* clear state */
1745 	count += 2;
1746 
1747 	return count;
1748 }
1749 
1750 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1751 				    volatile u32 *buffer)
1752 {
1753 	u32 count = 0, i;
1754 	const struct cs_section_def *sect = NULL;
1755 	const struct cs_extent_def *ext = NULL;
1756 
1757 	if (adev->gfx.rlc.cs_data == NULL)
1758 		return;
1759 	if (buffer == NULL)
1760 		return;
1761 
1762 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1763 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1764 
1765 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1766 	buffer[count++] = cpu_to_le32(0x80000000);
1767 	buffer[count++] = cpu_to_le32(0x80000000);
1768 
1769 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1770 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1771 			if (sect->id == SECT_CONTEXT) {
1772 				buffer[count++] =
1773 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1774 				buffer[count++] = cpu_to_le32(ext->reg_index -
1775 						PACKET3_SET_CONTEXT_REG_START);
1776 				for (i = 0; i < ext->reg_count; i++)
1777 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1778 			} else {
1779 				return;
1780 			}
1781 		}
1782 	}
1783 
1784 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1785 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1786 
1787 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1788 	buffer[count++] = cpu_to_le32(0);
1789 }
1790 
1791 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1792 {
1793 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1794 	uint32_t pg_always_on_cu_num = 2;
1795 	uint32_t always_on_cu_num;
1796 	uint32_t i, j, k;
1797 	uint32_t mask, cu_bitmap, counter;
1798 
1799 	if (adev->flags & AMD_IS_APU)
1800 		always_on_cu_num = 4;
1801 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1802 		always_on_cu_num = 8;
1803 	else
1804 		always_on_cu_num = 12;
1805 
1806 	mutex_lock(&adev->grbm_idx_mutex);
1807 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1808 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1809 			mask = 1;
1810 			cu_bitmap = 0;
1811 			counter = 0;
1812 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1813 
1814 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1815 				if (cu_info->bitmap[i][j] & mask) {
1816 					if (counter == pg_always_on_cu_num)
1817 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1818 					if (counter < always_on_cu_num)
1819 						cu_bitmap |= mask;
1820 					else
1821 						break;
1822 					counter++;
1823 				}
1824 				mask <<= 1;
1825 			}
1826 
1827 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1828 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1829 		}
1830 	}
1831 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1832 	mutex_unlock(&adev->grbm_idx_mutex);
1833 }
1834 
1835 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1836 {
1837 	uint32_t data;
1838 
1839 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1840 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1841 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1842 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1843 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1844 
1845 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1846 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1847 
1848 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1849 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1850 
1851 	mutex_lock(&adev->grbm_idx_mutex);
1852 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1853 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1854 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1855 
1856 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1857 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1858 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1859 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1860 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1861 
1862 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1863 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1864 	data &= 0x0000FFFF;
1865 	data |= 0x00C00000;
1866 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1867 
1868 	/*
1869 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1870 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1871 	 */
1872 
1873 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1874 	 * but used for RLC_LB_CNTL configuration */
1875 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1876 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1877 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1878 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1879 	mutex_unlock(&adev->grbm_idx_mutex);
1880 
1881 	gfx_v9_0_init_always_on_cu_mask(adev);
1882 }
1883 
1884 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1885 {
1886 	uint32_t data;
1887 
1888 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1889 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1890 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1891 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1892 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1893 
1894 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1895 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1896 
1897 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1898 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1899 
1900 	mutex_lock(&adev->grbm_idx_mutex);
1901 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1902 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1903 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1904 
1905 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1906 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1907 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1908 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1909 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1910 
1911 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1912 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1913 	data &= 0x0000FFFF;
1914 	data |= 0x00C00000;
1915 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1916 
1917 	/*
1918 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1919 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1920 	 */
1921 
1922 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1923 	 * but used for RLC_LB_CNTL configuration */
1924 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1925 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1926 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1927 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1928 	mutex_unlock(&adev->grbm_idx_mutex);
1929 
1930 	gfx_v9_0_init_always_on_cu_mask(adev);
1931 }
1932 
1933 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1934 {
1935 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1936 }
1937 
1938 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1939 {
1940 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1941 		return 5;
1942 	else
1943 		return 4;
1944 }
1945 
1946 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1947 {
1948 	const struct cs_section_def *cs_data;
1949 	int r;
1950 
1951 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1952 
1953 	cs_data = adev->gfx.rlc.cs_data;
1954 
1955 	if (cs_data) {
1956 		/* init clear state block */
1957 		r = amdgpu_gfx_rlc_init_csb(adev);
1958 		if (r)
1959 			return r;
1960 	}
1961 
1962 	if (adev->flags & AMD_IS_APU) {
1963 		/* TODO: double check the cp_table_size for RV */
1964 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1965 		r = amdgpu_gfx_rlc_init_cpt(adev);
1966 		if (r)
1967 			return r;
1968 	}
1969 
1970 	switch (adev->ip_versions[GC_HWIP][0]) {
1971 	case IP_VERSION(9, 2, 2):
1972 	case IP_VERSION(9, 1, 0):
1973 		gfx_v9_0_init_lbpw(adev);
1974 		break;
1975 	case IP_VERSION(9, 4, 0):
1976 		gfx_v9_4_init_lbpw(adev);
1977 		break;
1978 	default:
1979 		break;
1980 	}
1981 
1982 	/* init spm vmid with 0xf */
1983 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1984 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1985 
1986 	return 0;
1987 }
1988 
1989 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1990 {
1991 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1992 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1993 }
1994 
1995 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1996 {
1997 	int r;
1998 	u32 *hpd;
1999 	const __le32 *fw_data;
2000 	unsigned fw_size;
2001 	u32 *fw;
2002 	size_t mec_hpd_size;
2003 
2004 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2005 
2006 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2007 
2008 	/* take ownership of the relevant compute queues */
2009 	amdgpu_gfx_compute_queue_acquire(adev);
2010 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2011 	if (mec_hpd_size) {
2012 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2013 					      AMDGPU_GEM_DOMAIN_VRAM,
2014 					      &adev->gfx.mec.hpd_eop_obj,
2015 					      &adev->gfx.mec.hpd_eop_gpu_addr,
2016 					      (void **)&hpd);
2017 		if (r) {
2018 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2019 			gfx_v9_0_mec_fini(adev);
2020 			return r;
2021 		}
2022 
2023 		memset(hpd, 0, mec_hpd_size);
2024 
2025 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2026 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2027 	}
2028 
2029 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2030 
2031 	fw_data = (const __le32 *)
2032 		(adev->gfx.mec_fw->data +
2033 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2034 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2035 
2036 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2037 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2038 				      &adev->gfx.mec.mec_fw_obj,
2039 				      &adev->gfx.mec.mec_fw_gpu_addr,
2040 				      (void **)&fw);
2041 	if (r) {
2042 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2043 		gfx_v9_0_mec_fini(adev);
2044 		return r;
2045 	}
2046 
2047 	memcpy(fw, fw_data, fw_size);
2048 
2049 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2050 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2051 
2052 	return 0;
2053 }
2054 
2055 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2056 {
2057 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2058 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2059 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2060 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2061 		(SQ_IND_INDEX__FORCE_READ_MASK));
2062 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2063 }
2064 
2065 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2066 			   uint32_t wave, uint32_t thread,
2067 			   uint32_t regno, uint32_t num, uint32_t *out)
2068 {
2069 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2070 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2071 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2072 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2073 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2074 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2075 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2076 	while (num--)
2077 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2078 }
2079 
2080 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2081 {
2082 	/* type 1 wave data */
2083 	dst[(*no_fields)++] = 1;
2084 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2085 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2086 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2087 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2088 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2089 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2090 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2091 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2092 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2093 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2094 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2095 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2096 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2097 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2098 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2099 }
2100 
2101 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2102 				     uint32_t wave, uint32_t start,
2103 				     uint32_t size, uint32_t *dst)
2104 {
2105 	wave_read_regs(
2106 		adev, simd, wave, 0,
2107 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2108 }
2109 
2110 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2111 				     uint32_t wave, uint32_t thread,
2112 				     uint32_t start, uint32_t size,
2113 				     uint32_t *dst)
2114 {
2115 	wave_read_regs(
2116 		adev, simd, wave, thread,
2117 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2118 }
2119 
2120 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2121 				  u32 me, u32 pipe, u32 q, u32 vm)
2122 {
2123 	soc15_grbm_select(adev, me, pipe, q, vm);
2124 }
2125 
2126 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2127         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2128         .select_se_sh = &gfx_v9_0_select_se_sh,
2129         .read_wave_data = &gfx_v9_0_read_wave_data,
2130         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2131         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2132         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2133 };
2134 
2135 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2136 	.ras_late_init = amdgpu_gfx_ras_late_init,
2137 	.ras_fini = amdgpu_gfx_ras_fini,
2138 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2139 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2140 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2141 };
2142 
2143 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2144 {
2145 	u32 gb_addr_config;
2146 	int err;
2147 
2148 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2149 
2150 	switch (adev->ip_versions[GC_HWIP][0]) {
2151 	case IP_VERSION(9, 0, 1):
2152 		adev->gfx.config.max_hw_contexts = 8;
2153 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2154 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2155 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2156 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2157 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2158 		break;
2159 	case IP_VERSION(9, 2, 1):
2160 		adev->gfx.config.max_hw_contexts = 8;
2161 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2162 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2163 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2164 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2165 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2166 		DRM_INFO("fix gfx.config for vega12\n");
2167 		break;
2168 	case IP_VERSION(9, 4, 0):
2169 		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2170 		adev->gfx.config.max_hw_contexts = 8;
2171 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2172 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2173 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2174 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2175 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2176 		gb_addr_config &= ~0xf3e777ff;
2177 		gb_addr_config |= 0x22014042;
2178 		/* check vbios table if gpu info is not available */
2179 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2180 		if (err)
2181 			return err;
2182 		break;
2183 	case IP_VERSION(9, 2, 2):
2184 	case IP_VERSION(9, 1, 0):
2185 		adev->gfx.config.max_hw_contexts = 8;
2186 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2187 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2188 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2189 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2190 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2191 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2192 		else
2193 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2194 		break;
2195 	case IP_VERSION(9, 4, 1):
2196 		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2197 		adev->gfx.config.max_hw_contexts = 8;
2198 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2199 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2200 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2201 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2202 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2203 		gb_addr_config &= ~0xf3e777ff;
2204 		gb_addr_config |= 0x22014042;
2205 		break;
2206 	case IP_VERSION(9, 3, 0):
2207 		adev->gfx.config.max_hw_contexts = 8;
2208 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2209 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2210 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2211 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2212 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2213 		gb_addr_config &= ~0xf3e777ff;
2214 		gb_addr_config |= 0x22010042;
2215 		break;
2216 	case IP_VERSION(9, 4, 2):
2217 		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2218 		adev->gfx.config.max_hw_contexts = 8;
2219 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2220 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2221 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2222 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2223 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2224 		gb_addr_config &= ~0xf3e777ff;
2225 		gb_addr_config |= 0x22014042;
2226 		/* check vbios table if gpu info is not available */
2227 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2228 		if (err)
2229 			return err;
2230 		break;
2231 	default:
2232 		BUG();
2233 		break;
2234 	}
2235 
2236 	adev->gfx.config.gb_addr_config = gb_addr_config;
2237 
2238 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2239 			REG_GET_FIELD(
2240 					adev->gfx.config.gb_addr_config,
2241 					GB_ADDR_CONFIG,
2242 					NUM_PIPES);
2243 
2244 	adev->gfx.config.max_tile_pipes =
2245 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2246 
2247 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2248 			REG_GET_FIELD(
2249 					adev->gfx.config.gb_addr_config,
2250 					GB_ADDR_CONFIG,
2251 					NUM_BANKS);
2252 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2253 			REG_GET_FIELD(
2254 					adev->gfx.config.gb_addr_config,
2255 					GB_ADDR_CONFIG,
2256 					MAX_COMPRESSED_FRAGS);
2257 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2258 			REG_GET_FIELD(
2259 					adev->gfx.config.gb_addr_config,
2260 					GB_ADDR_CONFIG,
2261 					NUM_RB_PER_SE);
2262 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2263 			REG_GET_FIELD(
2264 					adev->gfx.config.gb_addr_config,
2265 					GB_ADDR_CONFIG,
2266 					NUM_SHADER_ENGINES);
2267 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2268 			REG_GET_FIELD(
2269 					adev->gfx.config.gb_addr_config,
2270 					GB_ADDR_CONFIG,
2271 					PIPE_INTERLEAVE_SIZE));
2272 
2273 	return 0;
2274 }
2275 
2276 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2277 				      int mec, int pipe, int queue)
2278 {
2279 	unsigned irq_type;
2280 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2281 	unsigned int hw_prio;
2282 
2283 	ring = &adev->gfx.compute_ring[ring_id];
2284 
2285 	/* mec0 is me1 */
2286 	ring->me = mec + 1;
2287 	ring->pipe = pipe;
2288 	ring->queue = queue;
2289 
2290 	ring->ring_obj = NULL;
2291 	ring->use_doorbell = true;
2292 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2293 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2294 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2295 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2296 
2297 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2298 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2299 		+ ring->pipe;
2300 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2301 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2302 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2303 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2304 				hw_prio, NULL);
2305 }
2306 
2307 static int gfx_v9_0_sw_init(void *handle)
2308 {
2309 	int i, j, k, r, ring_id;
2310 	struct amdgpu_ring *ring;
2311 	struct amdgpu_kiq *kiq;
2312 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2313 
2314 	switch (adev->ip_versions[GC_HWIP][0]) {
2315 	case IP_VERSION(9, 0, 1):
2316 	case IP_VERSION(9, 2, 1):
2317 	case IP_VERSION(9, 4, 0):
2318 	case IP_VERSION(9, 2, 2):
2319 	case IP_VERSION(9, 1, 0):
2320 	case IP_VERSION(9, 4, 1):
2321 	case IP_VERSION(9, 3, 0):
2322 	case IP_VERSION(9, 4, 2):
2323 		adev->gfx.mec.num_mec = 2;
2324 		break;
2325 	default:
2326 		adev->gfx.mec.num_mec = 1;
2327 		break;
2328 	}
2329 
2330 	adev->gfx.mec.num_pipe_per_mec = 4;
2331 	adev->gfx.mec.num_queue_per_pipe = 8;
2332 
2333 	/* EOP Event */
2334 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2335 	if (r)
2336 		return r;
2337 
2338 	/* Privileged reg */
2339 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2340 			      &adev->gfx.priv_reg_irq);
2341 	if (r)
2342 		return r;
2343 
2344 	/* Privileged inst */
2345 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2346 			      &adev->gfx.priv_inst_irq);
2347 	if (r)
2348 		return r;
2349 
2350 	/* ECC error */
2351 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2352 			      &adev->gfx.cp_ecc_error_irq);
2353 	if (r)
2354 		return r;
2355 
2356 	/* FUE error */
2357 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2358 			      &adev->gfx.cp_ecc_error_irq);
2359 	if (r)
2360 		return r;
2361 
2362 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2363 
2364 	gfx_v9_0_scratch_init(adev);
2365 
2366 	r = gfx_v9_0_init_microcode(adev);
2367 	if (r) {
2368 		DRM_ERROR("Failed to load gfx firmware!\n");
2369 		return r;
2370 	}
2371 
2372 	r = adev->gfx.rlc.funcs->init(adev);
2373 	if (r) {
2374 		DRM_ERROR("Failed to init rlc BOs!\n");
2375 		return r;
2376 	}
2377 
2378 	r = gfx_v9_0_mec_init(adev);
2379 	if (r) {
2380 		DRM_ERROR("Failed to init MEC BOs!\n");
2381 		return r;
2382 	}
2383 
2384 	/* set up the gfx ring */
2385 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2386 		ring = &adev->gfx.gfx_ring[i];
2387 		ring->ring_obj = NULL;
2388 		if (!i)
2389 			sprintf(ring->name, "gfx");
2390 		else
2391 			sprintf(ring->name, "gfx_%d", i);
2392 		ring->use_doorbell = true;
2393 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2394 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2395 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2396 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2397 		if (r)
2398 			return r;
2399 	}
2400 
2401 	/* set up the compute queues - allocate horizontally across pipes */
2402 	ring_id = 0;
2403 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2404 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2405 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2406 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2407 					continue;
2408 
2409 				r = gfx_v9_0_compute_ring_init(adev,
2410 							       ring_id,
2411 							       i, k, j);
2412 				if (r)
2413 					return r;
2414 
2415 				ring_id++;
2416 			}
2417 		}
2418 	}
2419 
2420 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2421 	if (r) {
2422 		DRM_ERROR("Failed to init KIQ BOs!\n");
2423 		return r;
2424 	}
2425 
2426 	kiq = &adev->gfx.kiq;
2427 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2428 	if (r)
2429 		return r;
2430 
2431 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2432 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2433 	if (r)
2434 		return r;
2435 
2436 	adev->gfx.ce_ram_size = 0x8000;
2437 
2438 	r = gfx_v9_0_gpu_early_init(adev);
2439 	if (r)
2440 		return r;
2441 
2442 	return 0;
2443 }
2444 
2445 
2446 static int gfx_v9_0_sw_fini(void *handle)
2447 {
2448 	int i;
2449 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450 
2451 	if (adev->gfx.ras_funcs &&
2452 	    adev->gfx.ras_funcs->ras_fini)
2453 		adev->gfx.ras_funcs->ras_fini(adev);
2454 
2455 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2456 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2457 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2458 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2459 
2460 	amdgpu_gfx_mqd_sw_fini(adev);
2461 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2462 	amdgpu_gfx_kiq_fini(adev);
2463 
2464 	gfx_v9_0_mec_fini(adev);
2465 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2466 	if (adev->flags & AMD_IS_APU) {
2467 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2468 				&adev->gfx.rlc.cp_table_gpu_addr,
2469 				(void **)&adev->gfx.rlc.cp_table_ptr);
2470 	}
2471 	gfx_v9_0_free_microcode(adev);
2472 
2473 	return 0;
2474 }
2475 
2476 
2477 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2478 {
2479 	/* TODO */
2480 }
2481 
2482 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2483 			   u32 instance)
2484 {
2485 	u32 data;
2486 
2487 	if (instance == 0xffffffff)
2488 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2489 	else
2490 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2491 
2492 	if (se_num == 0xffffffff)
2493 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2494 	else
2495 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2496 
2497 	if (sh_num == 0xffffffff)
2498 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2499 	else
2500 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2501 
2502 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2503 }
2504 
2505 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2506 {
2507 	u32 data, mask;
2508 
2509 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2510 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2511 
2512 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2513 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2514 
2515 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2516 					 adev->gfx.config.max_sh_per_se);
2517 
2518 	return (~data) & mask;
2519 }
2520 
2521 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2522 {
2523 	int i, j;
2524 	u32 data;
2525 	u32 active_rbs = 0;
2526 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2527 					adev->gfx.config.max_sh_per_se;
2528 
2529 	mutex_lock(&adev->grbm_idx_mutex);
2530 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2531 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2532 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2533 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2534 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2535 					       rb_bitmap_width_per_sh);
2536 		}
2537 	}
2538 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2539 	mutex_unlock(&adev->grbm_idx_mutex);
2540 
2541 	adev->gfx.config.backend_enable_mask = active_rbs;
2542 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2543 }
2544 
2545 #define DEFAULT_SH_MEM_BASES	(0x6000)
2546 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2547 {
2548 	int i;
2549 	uint32_t sh_mem_config;
2550 	uint32_t sh_mem_bases;
2551 
2552 	/*
2553 	 * Configure apertures:
2554 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2555 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2556 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2557 	 */
2558 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2559 
2560 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2561 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2562 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2563 
2564 	mutex_lock(&adev->srbm_mutex);
2565 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2566 		soc15_grbm_select(adev, 0, 0, 0, i);
2567 		/* CP and shaders */
2568 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2569 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2570 	}
2571 	soc15_grbm_select(adev, 0, 0, 0, 0);
2572 	mutex_unlock(&adev->srbm_mutex);
2573 
2574 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2575 	   acccess. These should be enabled by FW for target VMIDs. */
2576 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2577 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2578 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2579 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2580 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2581 	}
2582 }
2583 
2584 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2585 {
2586 	int vmid;
2587 
2588 	/*
2589 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2590 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2591 	 * the driver can enable them for graphics. VMID0 should maintain
2592 	 * access so that HWS firmware can save/restore entries.
2593 	 */
2594 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2595 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2596 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2597 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2598 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2599 	}
2600 }
2601 
2602 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2603 {
2604 	uint32_t tmp;
2605 
2606 	switch (adev->ip_versions[GC_HWIP][0]) {
2607 	case IP_VERSION(9, 4, 1):
2608 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2609 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2610 					DISABLE_BARRIER_WAITCNT, 1);
2611 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2612 		break;
2613 	default:
2614 		break;
2615 	}
2616 }
2617 
2618 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2619 {
2620 	u32 tmp;
2621 	int i;
2622 
2623 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2624 
2625 	gfx_v9_0_tiling_mode_table_init(adev);
2626 
2627 	gfx_v9_0_setup_rb(adev);
2628 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2629 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2630 
2631 	/* XXX SH_MEM regs */
2632 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2633 	mutex_lock(&adev->srbm_mutex);
2634 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2635 		soc15_grbm_select(adev, 0, 0, 0, i);
2636 		/* CP and shaders */
2637 		if (i == 0) {
2638 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2639 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2640 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2641 					    !!adev->gmc.noretry);
2642 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2643 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2644 		} else {
2645 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648 					    !!adev->gmc.noretry);
2649 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2651 				(adev->gmc.private_aperture_start >> 48));
2652 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2653 				(adev->gmc.shared_aperture_start >> 48));
2654 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2655 		}
2656 	}
2657 	soc15_grbm_select(adev, 0, 0, 0, 0);
2658 
2659 	mutex_unlock(&adev->srbm_mutex);
2660 
2661 	gfx_v9_0_init_compute_vmid(adev);
2662 	gfx_v9_0_init_gds_vmid(adev);
2663 	gfx_v9_0_init_sq_config(adev);
2664 }
2665 
2666 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2667 {
2668 	u32 i, j, k;
2669 	u32 mask;
2670 
2671 	mutex_lock(&adev->grbm_idx_mutex);
2672 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2673 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2674 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2675 			for (k = 0; k < adev->usec_timeout; k++) {
2676 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2677 					break;
2678 				udelay(1);
2679 			}
2680 			if (k == adev->usec_timeout) {
2681 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2682 						      0xffffffff, 0xffffffff);
2683 				mutex_unlock(&adev->grbm_idx_mutex);
2684 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2685 					 i, j);
2686 				return;
2687 			}
2688 		}
2689 	}
2690 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2691 	mutex_unlock(&adev->grbm_idx_mutex);
2692 
2693 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2694 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2695 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2696 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2697 	for (k = 0; k < adev->usec_timeout; k++) {
2698 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2699 			break;
2700 		udelay(1);
2701 	}
2702 }
2703 
2704 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2705 					       bool enable)
2706 {
2707 	u32 tmp;
2708 
2709 	/* These interrupts should be enabled to drive DS clock */
2710 
2711 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2712 
2713 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2714 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2715 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 	if(adev->gfx.num_gfx_rings)
2717 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2718 
2719 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2720 }
2721 
2722 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2723 {
2724 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2725 	/* csib */
2726 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2727 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2728 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2729 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2730 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2731 			adev->gfx.rlc.clear_state_size);
2732 }
2733 
2734 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2735 				int indirect_offset,
2736 				int list_size,
2737 				int *unique_indirect_regs,
2738 				int unique_indirect_reg_count,
2739 				int *indirect_start_offsets,
2740 				int *indirect_start_offsets_count,
2741 				int max_start_offsets_count)
2742 {
2743 	int idx;
2744 
2745 	for (; indirect_offset < list_size; indirect_offset++) {
2746 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2747 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2748 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2749 
2750 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2751 			indirect_offset += 2;
2752 
2753 			/* look for the matching indice */
2754 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2755 				if (unique_indirect_regs[idx] ==
2756 					register_list_format[indirect_offset] ||
2757 					!unique_indirect_regs[idx])
2758 					break;
2759 			}
2760 
2761 			BUG_ON(idx >= unique_indirect_reg_count);
2762 
2763 			if (!unique_indirect_regs[idx])
2764 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2765 
2766 			indirect_offset++;
2767 		}
2768 	}
2769 }
2770 
2771 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2772 {
2773 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2774 	int unique_indirect_reg_count = 0;
2775 
2776 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2777 	int indirect_start_offsets_count = 0;
2778 
2779 	int list_size = 0;
2780 	int i = 0, j = 0;
2781 	u32 tmp = 0;
2782 
2783 	u32 *register_list_format =
2784 		kmemdup(adev->gfx.rlc.register_list_format,
2785 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2786 	if (!register_list_format)
2787 		return -ENOMEM;
2788 
2789 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2790 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2791 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2792 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2793 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2794 				    unique_indirect_regs,
2795 				    unique_indirect_reg_count,
2796 				    indirect_start_offsets,
2797 				    &indirect_start_offsets_count,
2798 				    ARRAY_SIZE(indirect_start_offsets));
2799 
2800 	/* enable auto inc in case it is disabled */
2801 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2802 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2803 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2804 
2805 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2806 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2807 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2808 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2809 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2810 			adev->gfx.rlc.register_restore[i]);
2811 
2812 	/* load indirect register */
2813 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2814 		adev->gfx.rlc.reg_list_format_start);
2815 
2816 	/* direct register portion */
2817 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2818 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2819 			register_list_format[i]);
2820 
2821 	/* indirect register portion */
2822 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2823 		if (register_list_format[i] == 0xFFFFFFFF) {
2824 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2825 			continue;
2826 		}
2827 
2828 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2829 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2830 
2831 		for (j = 0; j < unique_indirect_reg_count; j++) {
2832 			if (register_list_format[i] == unique_indirect_regs[j]) {
2833 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2834 				break;
2835 			}
2836 		}
2837 
2838 		BUG_ON(j >= unique_indirect_reg_count);
2839 
2840 		i++;
2841 	}
2842 
2843 	/* set save/restore list size */
2844 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2845 	list_size = list_size >> 1;
2846 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2847 		adev->gfx.rlc.reg_restore_list_size);
2848 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2849 
2850 	/* write the starting offsets to RLC scratch ram */
2851 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2852 		adev->gfx.rlc.starting_offsets_start);
2853 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2854 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2855 		       indirect_start_offsets[i]);
2856 
2857 	/* load unique indirect regs*/
2858 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2859 		if (unique_indirect_regs[i] != 0) {
2860 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2861 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2862 			       unique_indirect_regs[i] & 0x3FFFF);
2863 
2864 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2865 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2866 			       unique_indirect_regs[i] >> 20);
2867 		}
2868 	}
2869 
2870 	kfree(register_list_format);
2871 	return 0;
2872 }
2873 
2874 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2875 {
2876 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2877 }
2878 
2879 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2880 					     bool enable)
2881 {
2882 	uint32_t data = 0;
2883 	uint32_t default_data = 0;
2884 
2885 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2886 	if (enable) {
2887 		/* enable GFXIP control over CGPG */
2888 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2889 		if(default_data != data)
2890 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2891 
2892 		/* update status */
2893 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2894 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2895 		if(default_data != data)
2896 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2897 	} else {
2898 		/* restore GFXIP control over GCPG */
2899 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2900 		if(default_data != data)
2901 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2902 	}
2903 }
2904 
2905 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2906 {
2907 	uint32_t data = 0;
2908 
2909 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2910 			      AMD_PG_SUPPORT_GFX_SMG |
2911 			      AMD_PG_SUPPORT_GFX_DMG)) {
2912 		/* init IDLE_POLL_COUNT = 60 */
2913 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2914 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2915 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2916 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2917 
2918 		/* init RLC PG Delay */
2919 		data = 0;
2920 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2921 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2922 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2923 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2924 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2925 
2926 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2927 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2928 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2929 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2930 
2931 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2932 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2933 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2934 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2935 
2936 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2937 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2938 
2939 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2940 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2941 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2942 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2943 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2944 	}
2945 }
2946 
2947 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2948 						bool enable)
2949 {
2950 	uint32_t data = 0;
2951 	uint32_t default_data = 0;
2952 
2953 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2954 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2955 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2956 			     enable ? 1 : 0);
2957 	if (default_data != data)
2958 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2959 }
2960 
2961 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2962 						bool enable)
2963 {
2964 	uint32_t data = 0;
2965 	uint32_t default_data = 0;
2966 
2967 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2968 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2969 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2970 			     enable ? 1 : 0);
2971 	if(default_data != data)
2972 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2973 }
2974 
2975 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2976 					bool enable)
2977 {
2978 	uint32_t data = 0;
2979 	uint32_t default_data = 0;
2980 
2981 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2982 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2983 			     CP_PG_DISABLE,
2984 			     enable ? 0 : 1);
2985 	if(default_data != data)
2986 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2987 }
2988 
2989 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2990 						bool enable)
2991 {
2992 	uint32_t data, default_data;
2993 
2994 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2995 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2996 			     GFX_POWER_GATING_ENABLE,
2997 			     enable ? 1 : 0);
2998 	if(default_data != data)
2999 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3000 }
3001 
3002 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3003 						bool enable)
3004 {
3005 	uint32_t data, default_data;
3006 
3007 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3008 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3009 			     GFX_PIPELINE_PG_ENABLE,
3010 			     enable ? 1 : 0);
3011 	if(default_data != data)
3012 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3013 
3014 	if (!enable)
3015 		/* read any GFX register to wake up GFX */
3016 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3017 }
3018 
3019 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3020 						       bool enable)
3021 {
3022 	uint32_t data, default_data;
3023 
3024 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3025 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3026 			     STATIC_PER_CU_PG_ENABLE,
3027 			     enable ? 1 : 0);
3028 	if(default_data != data)
3029 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3030 }
3031 
3032 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3033 						bool enable)
3034 {
3035 	uint32_t data, default_data;
3036 
3037 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3038 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3039 			     DYN_PER_CU_PG_ENABLE,
3040 			     enable ? 1 : 0);
3041 	if(default_data != data)
3042 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3043 }
3044 
3045 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3046 {
3047 	gfx_v9_0_init_csb(adev);
3048 
3049 	/*
3050 	 * Rlc save restore list is workable since v2_1.
3051 	 * And it's needed by gfxoff feature.
3052 	 */
3053 	if (adev->gfx.rlc.is_rlc_v2_1) {
3054 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3055 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3056 			gfx_v9_1_init_rlc_save_restore_list(adev);
3057 		gfx_v9_0_enable_save_restore_machine(adev);
3058 	}
3059 
3060 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3061 			      AMD_PG_SUPPORT_GFX_SMG |
3062 			      AMD_PG_SUPPORT_GFX_DMG |
3063 			      AMD_PG_SUPPORT_CP |
3064 			      AMD_PG_SUPPORT_GDS |
3065 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3066 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
3067 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
3068 		gfx_v9_0_init_gfx_power_gating(adev);
3069 	}
3070 }
3071 
3072 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3073 {
3074 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3075 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3076 	gfx_v9_0_wait_for_rlc_serdes(adev);
3077 }
3078 
3079 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3080 {
3081 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3082 	udelay(50);
3083 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3084 	udelay(50);
3085 }
3086 
3087 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3088 {
3089 #ifdef AMDGPU_RLC_DEBUG_RETRY
3090 	u32 rlc_ucode_ver;
3091 #endif
3092 
3093 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3094 	udelay(50);
3095 
3096 	/* carrizo do enable cp interrupt after cp inited */
3097 	if (!(adev->flags & AMD_IS_APU)) {
3098 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3099 		udelay(50);
3100 	}
3101 
3102 #ifdef AMDGPU_RLC_DEBUG_RETRY
3103 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3104 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3105 	if(rlc_ucode_ver == 0x108) {
3106 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3107 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3108 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3109 		 * default is 0x9C4 to create a 100us interval */
3110 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3111 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3112 		 * to disable the page fault retry interrupts, default is
3113 		 * 0x100 (256) */
3114 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3115 	}
3116 #endif
3117 }
3118 
3119 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3120 {
3121 	const struct rlc_firmware_header_v2_0 *hdr;
3122 	const __le32 *fw_data;
3123 	unsigned i, fw_size;
3124 
3125 	if (!adev->gfx.rlc_fw)
3126 		return -EINVAL;
3127 
3128 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3129 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3130 
3131 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3132 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3133 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3134 
3135 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3136 			RLCG_UCODE_LOADING_START_ADDRESS);
3137 	for (i = 0; i < fw_size; i++)
3138 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3139 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3140 
3141 	return 0;
3142 }
3143 
3144 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3145 {
3146 	int r;
3147 
3148 	if (amdgpu_sriov_vf(adev)) {
3149 		gfx_v9_0_init_csb(adev);
3150 		return 0;
3151 	}
3152 
3153 	adev->gfx.rlc.funcs->stop(adev);
3154 
3155 	/* disable CG */
3156 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3157 
3158 	gfx_v9_0_init_pg(adev);
3159 
3160 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3161 		/* legacy rlc firmware loading */
3162 		r = gfx_v9_0_rlc_load_microcode(adev);
3163 		if (r)
3164 			return r;
3165 	}
3166 
3167 	switch (adev->ip_versions[GC_HWIP][0]) {
3168 	case IP_VERSION(9, 2, 2):
3169 	case IP_VERSION(9, 1, 0):
3170 		if (amdgpu_lbpw == 0)
3171 			gfx_v9_0_enable_lbpw(adev, false);
3172 		else
3173 			gfx_v9_0_enable_lbpw(adev, true);
3174 		break;
3175 	case IP_VERSION(9, 4, 0):
3176 		if (amdgpu_lbpw > 0)
3177 			gfx_v9_0_enable_lbpw(adev, true);
3178 		else
3179 			gfx_v9_0_enable_lbpw(adev, false);
3180 		break;
3181 	default:
3182 		break;
3183 	}
3184 
3185 	adev->gfx.rlc.funcs->start(adev);
3186 
3187 	return 0;
3188 }
3189 
3190 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3191 {
3192 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3193 
3194 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3195 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3196 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3197 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3198 	udelay(50);
3199 }
3200 
3201 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3202 {
3203 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3204 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3205 	const struct gfx_firmware_header_v1_0 *me_hdr;
3206 	const __le32 *fw_data;
3207 	unsigned i, fw_size;
3208 
3209 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3210 		return -EINVAL;
3211 
3212 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3213 		adev->gfx.pfp_fw->data;
3214 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3215 		adev->gfx.ce_fw->data;
3216 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3217 		adev->gfx.me_fw->data;
3218 
3219 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3220 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3221 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3222 
3223 	gfx_v9_0_cp_gfx_enable(adev, false);
3224 
3225 	/* PFP */
3226 	fw_data = (const __le32 *)
3227 		(adev->gfx.pfp_fw->data +
3228 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3229 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3230 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3231 	for (i = 0; i < fw_size; i++)
3232 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3233 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3234 
3235 	/* CE */
3236 	fw_data = (const __le32 *)
3237 		(adev->gfx.ce_fw->data +
3238 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3239 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3240 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3241 	for (i = 0; i < fw_size; i++)
3242 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3243 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3244 
3245 	/* ME */
3246 	fw_data = (const __le32 *)
3247 		(adev->gfx.me_fw->data +
3248 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3249 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3250 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3251 	for (i = 0; i < fw_size; i++)
3252 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3253 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3254 
3255 	return 0;
3256 }
3257 
3258 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3259 {
3260 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3261 	const struct cs_section_def *sect = NULL;
3262 	const struct cs_extent_def *ext = NULL;
3263 	int r, i, tmp;
3264 
3265 	/* init the CP */
3266 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3267 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3268 
3269 	gfx_v9_0_cp_gfx_enable(adev, true);
3270 
3271 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3272 	if (r) {
3273 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3274 		return r;
3275 	}
3276 
3277 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3278 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3279 
3280 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3281 	amdgpu_ring_write(ring, 0x80000000);
3282 	amdgpu_ring_write(ring, 0x80000000);
3283 
3284 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3285 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3286 			if (sect->id == SECT_CONTEXT) {
3287 				amdgpu_ring_write(ring,
3288 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3289 					       ext->reg_count));
3290 				amdgpu_ring_write(ring,
3291 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3292 				for (i = 0; i < ext->reg_count; i++)
3293 					amdgpu_ring_write(ring, ext->extent[i]);
3294 			}
3295 		}
3296 	}
3297 
3298 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3299 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3300 
3301 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302 	amdgpu_ring_write(ring, 0);
3303 
3304 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3305 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3306 	amdgpu_ring_write(ring, 0x8000);
3307 	amdgpu_ring_write(ring, 0x8000);
3308 
3309 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3310 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3311 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3312 	amdgpu_ring_write(ring, tmp);
3313 	amdgpu_ring_write(ring, 0);
3314 
3315 	amdgpu_ring_commit(ring);
3316 
3317 	return 0;
3318 }
3319 
3320 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3321 {
3322 	struct amdgpu_ring *ring;
3323 	u32 tmp;
3324 	u32 rb_bufsz;
3325 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3326 
3327 	/* Set the write pointer delay */
3328 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3329 
3330 	/* set the RB to use vmid 0 */
3331 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3332 
3333 	/* Set ring buffer size */
3334 	ring = &adev->gfx.gfx_ring[0];
3335 	rb_bufsz = order_base_2(ring->ring_size / 8);
3336 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3337 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3338 #ifdef __BIG_ENDIAN
3339 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3340 #endif
3341 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3342 
3343 	/* Initialize the ring buffer's write pointers */
3344 	ring->wptr = 0;
3345 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3346 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3347 
3348 	/* set the wb address wether it's enabled or not */
3349 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3350 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3351 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3352 
3353 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3354 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3355 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3356 
3357 	mdelay(1);
3358 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3359 
3360 	rb_addr = ring->gpu_addr >> 8;
3361 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3362 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3363 
3364 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3365 	if (ring->use_doorbell) {
3366 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3367 				    DOORBELL_OFFSET, ring->doorbell_index);
3368 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3369 				    DOORBELL_EN, 1);
3370 	} else {
3371 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3372 	}
3373 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3374 
3375 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3376 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3377 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3378 
3379 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3380 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3381 
3382 
3383 	/* start the ring */
3384 	gfx_v9_0_cp_gfx_start(adev);
3385 	ring->sched.ready = true;
3386 
3387 	return 0;
3388 }
3389 
3390 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3391 {
3392 	if (enable) {
3393 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3394 	} else {
3395 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3396 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3397 		adev->gfx.kiq.ring.sched.ready = false;
3398 	}
3399 	udelay(50);
3400 }
3401 
3402 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3403 {
3404 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3405 	const __le32 *fw_data;
3406 	unsigned i;
3407 	u32 tmp;
3408 
3409 	if (!adev->gfx.mec_fw)
3410 		return -EINVAL;
3411 
3412 	gfx_v9_0_cp_compute_enable(adev, false);
3413 
3414 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3415 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3416 
3417 	fw_data = (const __le32 *)
3418 		(adev->gfx.mec_fw->data +
3419 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3420 	tmp = 0;
3421 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3422 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3423 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3424 
3425 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3426 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3427 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3428 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3429 
3430 	/* MEC1 */
3431 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3432 			 mec_hdr->jt_offset);
3433 	for (i = 0; i < mec_hdr->jt_size; i++)
3434 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3435 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3436 
3437 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3438 			adev->gfx.mec_fw_version);
3439 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3440 
3441 	return 0;
3442 }
3443 
3444 /* KIQ functions */
3445 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3446 {
3447 	uint32_t tmp;
3448 	struct amdgpu_device *adev = ring->adev;
3449 
3450 	/* tell RLC which is KIQ queue */
3451 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3452 	tmp &= 0xffffff00;
3453 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3454 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3455 	tmp |= 0x80;
3456 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3457 }
3458 
3459 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3460 {
3461 	struct amdgpu_device *adev = ring->adev;
3462 
3463 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3464 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3465 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3466 			mqd->cp_hqd_queue_priority =
3467 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3468 		}
3469 	}
3470 }
3471 
3472 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3473 {
3474 	struct amdgpu_device *adev = ring->adev;
3475 	struct v9_mqd *mqd = ring->mqd_ptr;
3476 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3477 	uint32_t tmp;
3478 
3479 	mqd->header = 0xC0310800;
3480 	mqd->compute_pipelinestat_enable = 0x00000001;
3481 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3482 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3483 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3484 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3485 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3486 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3487 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3488 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3489 	mqd->compute_misc_reserved = 0x00000003;
3490 
3491 	mqd->dynamic_cu_mask_addr_lo =
3492 		lower_32_bits(ring->mqd_gpu_addr
3493 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3494 	mqd->dynamic_cu_mask_addr_hi =
3495 		upper_32_bits(ring->mqd_gpu_addr
3496 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3497 
3498 	eop_base_addr = ring->eop_gpu_addr >> 8;
3499 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3500 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3501 
3502 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3503 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3504 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3505 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3506 
3507 	mqd->cp_hqd_eop_control = tmp;
3508 
3509 	/* enable doorbell? */
3510 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3511 
3512 	if (ring->use_doorbell) {
3513 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3514 				    DOORBELL_OFFSET, ring->doorbell_index);
3515 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3516 				    DOORBELL_EN, 1);
3517 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518 				    DOORBELL_SOURCE, 0);
3519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520 				    DOORBELL_HIT, 0);
3521 	} else {
3522 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523 					 DOORBELL_EN, 0);
3524 	}
3525 
3526 	mqd->cp_hqd_pq_doorbell_control = tmp;
3527 
3528 	/* disable the queue if it's active */
3529 	ring->wptr = 0;
3530 	mqd->cp_hqd_dequeue_request = 0;
3531 	mqd->cp_hqd_pq_rptr = 0;
3532 	mqd->cp_hqd_pq_wptr_lo = 0;
3533 	mqd->cp_hqd_pq_wptr_hi = 0;
3534 
3535 	/* set the pointer to the MQD */
3536 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3537 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3538 
3539 	/* set MQD vmid to 0 */
3540 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3541 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3542 	mqd->cp_mqd_control = tmp;
3543 
3544 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3545 	hqd_gpu_addr = ring->gpu_addr >> 8;
3546 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3547 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3548 
3549 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3550 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3551 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3552 			    (order_base_2(ring->ring_size / 4) - 1));
3553 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3554 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3555 #ifdef __BIG_ENDIAN
3556 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3557 #endif
3558 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3559 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3561 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3562 	mqd->cp_hqd_pq_control = tmp;
3563 
3564 	/* set the wb address whether it's enabled or not */
3565 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3566 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3567 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3568 		upper_32_bits(wb_gpu_addr) & 0xffff;
3569 
3570 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3571 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3572 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3573 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3574 
3575 	tmp = 0;
3576 	/* enable the doorbell if requested */
3577 	if (ring->use_doorbell) {
3578 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3579 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3580 				DOORBELL_OFFSET, ring->doorbell_index);
3581 
3582 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3583 					 DOORBELL_EN, 1);
3584 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3585 					 DOORBELL_SOURCE, 0);
3586 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3587 					 DOORBELL_HIT, 0);
3588 	}
3589 
3590 	mqd->cp_hqd_pq_doorbell_control = tmp;
3591 
3592 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3593 	ring->wptr = 0;
3594 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3595 
3596 	/* set the vmid for the queue */
3597 	mqd->cp_hqd_vmid = 0;
3598 
3599 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3600 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3601 	mqd->cp_hqd_persistent_state = tmp;
3602 
3603 	/* set MIN_IB_AVAIL_SIZE */
3604 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3605 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3606 	mqd->cp_hqd_ib_control = tmp;
3607 
3608 	/* set static priority for a queue/ring */
3609 	gfx_v9_0_mqd_set_priority(ring, mqd);
3610 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3611 
3612 	/* map_queues packet doesn't need activate the queue,
3613 	 * so only kiq need set this field.
3614 	 */
3615 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3616 		mqd->cp_hqd_active = 1;
3617 
3618 	return 0;
3619 }
3620 
3621 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3622 {
3623 	struct amdgpu_device *adev = ring->adev;
3624 	struct v9_mqd *mqd = ring->mqd_ptr;
3625 	int j;
3626 
3627 	/* disable wptr polling */
3628 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3629 
3630 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3631 	       mqd->cp_hqd_eop_base_addr_lo);
3632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3633 	       mqd->cp_hqd_eop_base_addr_hi);
3634 
3635 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3636 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3637 	       mqd->cp_hqd_eop_control);
3638 
3639 	/* enable doorbell? */
3640 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3641 	       mqd->cp_hqd_pq_doorbell_control);
3642 
3643 	/* disable the queue if it's active */
3644 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3645 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3646 		for (j = 0; j < adev->usec_timeout; j++) {
3647 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3648 				break;
3649 			udelay(1);
3650 		}
3651 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3652 		       mqd->cp_hqd_dequeue_request);
3653 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3654 		       mqd->cp_hqd_pq_rptr);
3655 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3656 		       mqd->cp_hqd_pq_wptr_lo);
3657 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3658 		       mqd->cp_hqd_pq_wptr_hi);
3659 	}
3660 
3661 	/* set the pointer to the MQD */
3662 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3663 	       mqd->cp_mqd_base_addr_lo);
3664 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3665 	       mqd->cp_mqd_base_addr_hi);
3666 
3667 	/* set MQD vmid to 0 */
3668 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3669 	       mqd->cp_mqd_control);
3670 
3671 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3672 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3673 	       mqd->cp_hqd_pq_base_lo);
3674 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3675 	       mqd->cp_hqd_pq_base_hi);
3676 
3677 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3678 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3679 	       mqd->cp_hqd_pq_control);
3680 
3681 	/* set the wb address whether it's enabled or not */
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3683 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3685 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3686 
3687 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3688 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3689 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3690 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3691 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3692 
3693 	/* enable the doorbell if requested */
3694 	if (ring->use_doorbell) {
3695 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3696 					(adev->doorbell_index.kiq * 2) << 2);
3697 		/* If GC has entered CGPG, ringing doorbell > first page
3698 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3699 		 * workaround this issue. And this change has to align with firmware
3700 		 * update.
3701 		 */
3702 		if (check_if_enlarge_doorbell_range(adev))
3703 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3704 					(adev->doorbell.size - 4));
3705 		else
3706 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3707 					(adev->doorbell_index.userqueue_end * 2) << 2);
3708 	}
3709 
3710 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3711 	       mqd->cp_hqd_pq_doorbell_control);
3712 
3713 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3714 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3715 	       mqd->cp_hqd_pq_wptr_lo);
3716 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3717 	       mqd->cp_hqd_pq_wptr_hi);
3718 
3719 	/* set the vmid for the queue */
3720 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3721 
3722 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3723 	       mqd->cp_hqd_persistent_state);
3724 
3725 	/* activate the queue */
3726 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3727 	       mqd->cp_hqd_active);
3728 
3729 	if (ring->use_doorbell)
3730 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3731 
3732 	return 0;
3733 }
3734 
3735 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3736 {
3737 	struct amdgpu_device *adev = ring->adev;
3738 	int j;
3739 
3740 	/* disable the queue if it's active */
3741 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3742 
3743 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3744 
3745 		for (j = 0; j < adev->usec_timeout; j++) {
3746 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3747 				break;
3748 			udelay(1);
3749 		}
3750 
3751 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3752 			DRM_DEBUG("KIQ dequeue request failed.\n");
3753 
3754 			/* Manual disable if dequeue request times out */
3755 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3756 		}
3757 
3758 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3759 		      0);
3760 	}
3761 
3762 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3763 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3764 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3765 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3766 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3767 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3768 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3769 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3770 
3771 	return 0;
3772 }
3773 
3774 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3775 {
3776 	struct amdgpu_device *adev = ring->adev;
3777 	struct v9_mqd *mqd = ring->mqd_ptr;
3778 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3779 	struct v9_mqd *tmp_mqd;
3780 
3781 	gfx_v9_0_kiq_setting(ring);
3782 
3783 	/* GPU could be in bad state during probe, driver trigger the reset
3784 	 * after load the SMU, in this case , the mqd is not be initialized.
3785 	 * driver need to re-init the mqd.
3786 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3787 	 */
3788 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3789 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3790 		/* for GPU_RESET case , reset MQD to a clean status */
3791 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3792 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3793 
3794 		/* reset ring buffer */
3795 		ring->wptr = 0;
3796 		amdgpu_ring_clear_ring(ring);
3797 
3798 		mutex_lock(&adev->srbm_mutex);
3799 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3800 		gfx_v9_0_kiq_init_register(ring);
3801 		soc15_grbm_select(adev, 0, 0, 0, 0);
3802 		mutex_unlock(&adev->srbm_mutex);
3803 	} else {
3804 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3805 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3806 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3807 		mutex_lock(&adev->srbm_mutex);
3808 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3809 		gfx_v9_0_mqd_init(ring);
3810 		gfx_v9_0_kiq_init_register(ring);
3811 		soc15_grbm_select(adev, 0, 0, 0, 0);
3812 		mutex_unlock(&adev->srbm_mutex);
3813 
3814 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3815 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3816 	}
3817 
3818 	return 0;
3819 }
3820 
3821 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3822 {
3823 	struct amdgpu_device *adev = ring->adev;
3824 	struct v9_mqd *mqd = ring->mqd_ptr;
3825 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3826 	struct v9_mqd *tmp_mqd;
3827 
3828 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3829 	 * is not be initialized before
3830 	 */
3831 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3832 
3833 	if (!tmp_mqd->cp_hqd_pq_control ||
3834 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3835 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3836 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3837 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3838 		mutex_lock(&adev->srbm_mutex);
3839 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3840 		gfx_v9_0_mqd_init(ring);
3841 		soc15_grbm_select(adev, 0, 0, 0, 0);
3842 		mutex_unlock(&adev->srbm_mutex);
3843 
3844 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3845 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3846 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3847 		/* reset MQD to a clean status */
3848 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3849 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3850 
3851 		/* reset ring buffer */
3852 		ring->wptr = 0;
3853 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3854 		amdgpu_ring_clear_ring(ring);
3855 	} else {
3856 		amdgpu_ring_clear_ring(ring);
3857 	}
3858 
3859 	return 0;
3860 }
3861 
3862 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3863 {
3864 	struct amdgpu_ring *ring;
3865 	int r;
3866 
3867 	ring = &adev->gfx.kiq.ring;
3868 
3869 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3870 	if (unlikely(r != 0))
3871 		return r;
3872 
3873 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3874 	if (unlikely(r != 0))
3875 		return r;
3876 
3877 	gfx_v9_0_kiq_init_queue(ring);
3878 	amdgpu_bo_kunmap(ring->mqd_obj);
3879 	ring->mqd_ptr = NULL;
3880 	amdgpu_bo_unreserve(ring->mqd_obj);
3881 	ring->sched.ready = true;
3882 	return 0;
3883 }
3884 
3885 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3886 {
3887 	struct amdgpu_ring *ring = NULL;
3888 	int r = 0, i;
3889 
3890 	gfx_v9_0_cp_compute_enable(adev, true);
3891 
3892 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3893 		ring = &adev->gfx.compute_ring[i];
3894 
3895 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3896 		if (unlikely(r != 0))
3897 			goto done;
3898 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3899 		if (!r) {
3900 			r = gfx_v9_0_kcq_init_queue(ring);
3901 			amdgpu_bo_kunmap(ring->mqd_obj);
3902 			ring->mqd_ptr = NULL;
3903 		}
3904 		amdgpu_bo_unreserve(ring->mqd_obj);
3905 		if (r)
3906 			goto done;
3907 	}
3908 
3909 	r = amdgpu_gfx_enable_kcq(adev);
3910 done:
3911 	return r;
3912 }
3913 
3914 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3915 {
3916 	int r, i;
3917 	struct amdgpu_ring *ring;
3918 
3919 	if (!(adev->flags & AMD_IS_APU))
3920 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3921 
3922 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3923 		if (adev->gfx.num_gfx_rings) {
3924 			/* legacy firmware loading */
3925 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3926 			if (r)
3927 				return r;
3928 		}
3929 
3930 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3931 		if (r)
3932 			return r;
3933 	}
3934 
3935 	r = gfx_v9_0_kiq_resume(adev);
3936 	if (r)
3937 		return r;
3938 
3939 	if (adev->gfx.num_gfx_rings) {
3940 		r = gfx_v9_0_cp_gfx_resume(adev);
3941 		if (r)
3942 			return r;
3943 	}
3944 
3945 	r = gfx_v9_0_kcq_resume(adev);
3946 	if (r)
3947 		return r;
3948 
3949 	if (adev->gfx.num_gfx_rings) {
3950 		ring = &adev->gfx.gfx_ring[0];
3951 		r = amdgpu_ring_test_helper(ring);
3952 		if (r)
3953 			return r;
3954 	}
3955 
3956 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3957 		ring = &adev->gfx.compute_ring[i];
3958 		amdgpu_ring_test_helper(ring);
3959 	}
3960 
3961 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3962 
3963 	return 0;
3964 }
3965 
3966 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3967 {
3968 	u32 tmp;
3969 
3970 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3971 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3972 		return;
3973 
3974 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3975 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3976 				adev->df.hash_status.hash_64k);
3977 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3978 				adev->df.hash_status.hash_2m);
3979 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3980 				adev->df.hash_status.hash_1g);
3981 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3982 }
3983 
3984 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3985 {
3986 	if (adev->gfx.num_gfx_rings)
3987 		gfx_v9_0_cp_gfx_enable(adev, enable);
3988 	gfx_v9_0_cp_compute_enable(adev, enable);
3989 }
3990 
3991 static int gfx_v9_0_hw_init(void *handle)
3992 {
3993 	int r;
3994 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3995 
3996 	if (!amdgpu_sriov_vf(adev))
3997 		gfx_v9_0_init_golden_registers(adev);
3998 
3999 	gfx_v9_0_constants_init(adev);
4000 
4001 	gfx_v9_0_init_tcp_config(adev);
4002 
4003 	r = adev->gfx.rlc.funcs->resume(adev);
4004 	if (r)
4005 		return r;
4006 
4007 	r = gfx_v9_0_cp_resume(adev);
4008 	if (r)
4009 		return r;
4010 
4011 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4012 		gfx_v9_4_2_set_power_brake_sequence(adev);
4013 
4014 	return r;
4015 }
4016 
4017 static int gfx_v9_0_hw_fini(void *handle)
4018 {
4019 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4020 
4021 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4022 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4023 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4024 
4025 	/* DF freeze and kcq disable will fail */
4026 	if (!amdgpu_ras_intr_triggered())
4027 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4028 		amdgpu_gfx_disable_kcq(adev);
4029 
4030 	if (amdgpu_sriov_vf(adev)) {
4031 		gfx_v9_0_cp_gfx_enable(adev, false);
4032 		/* must disable polling for SRIOV when hw finished, otherwise
4033 		 * CPC engine may still keep fetching WB address which is already
4034 		 * invalid after sw finished and trigger DMAR reading error in
4035 		 * hypervisor side.
4036 		 */
4037 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4038 		return 0;
4039 	}
4040 
4041 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4042 	 * otherwise KIQ is hanging when binding back
4043 	 */
4044 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4045 		mutex_lock(&adev->srbm_mutex);
4046 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4047 				adev->gfx.kiq.ring.pipe,
4048 				adev->gfx.kiq.ring.queue, 0);
4049 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4050 		soc15_grbm_select(adev, 0, 0, 0, 0);
4051 		mutex_unlock(&adev->srbm_mutex);
4052 	}
4053 
4054 	gfx_v9_0_cp_enable(adev, false);
4055 
4056 	/* Skip suspend with A+A reset */
4057 	if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4058 		dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4059 		return 0;
4060 	}
4061 
4062 	adev->gfx.rlc.funcs->stop(adev);
4063 	return 0;
4064 }
4065 
4066 static int gfx_v9_0_suspend(void *handle)
4067 {
4068 	return gfx_v9_0_hw_fini(handle);
4069 }
4070 
4071 static int gfx_v9_0_resume(void *handle)
4072 {
4073 	return gfx_v9_0_hw_init(handle);
4074 }
4075 
4076 static bool gfx_v9_0_is_idle(void *handle)
4077 {
4078 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4079 
4080 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4081 				GRBM_STATUS, GUI_ACTIVE))
4082 		return false;
4083 	else
4084 		return true;
4085 }
4086 
4087 static int gfx_v9_0_wait_for_idle(void *handle)
4088 {
4089 	unsigned i;
4090 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4091 
4092 	for (i = 0; i < adev->usec_timeout; i++) {
4093 		if (gfx_v9_0_is_idle(handle))
4094 			return 0;
4095 		udelay(1);
4096 	}
4097 	return -ETIMEDOUT;
4098 }
4099 
4100 static int gfx_v9_0_soft_reset(void *handle)
4101 {
4102 	u32 grbm_soft_reset = 0;
4103 	u32 tmp;
4104 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4105 
4106 	/* GRBM_STATUS */
4107 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4108 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4109 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4110 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4111 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4112 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4113 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4114 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4115 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4116 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4117 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4118 	}
4119 
4120 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4121 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4122 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4123 	}
4124 
4125 	/* GRBM_STATUS2 */
4126 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4127 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4128 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4129 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4130 
4131 
4132 	if (grbm_soft_reset) {
4133 		/* stop the rlc */
4134 		adev->gfx.rlc.funcs->stop(adev);
4135 
4136 		if (adev->gfx.num_gfx_rings)
4137 			/* Disable GFX parsing/prefetching */
4138 			gfx_v9_0_cp_gfx_enable(adev, false);
4139 
4140 		/* Disable MEC parsing/prefetching */
4141 		gfx_v9_0_cp_compute_enable(adev, false);
4142 
4143 		if (grbm_soft_reset) {
4144 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4145 			tmp |= grbm_soft_reset;
4146 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4147 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4148 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4149 
4150 			udelay(50);
4151 
4152 			tmp &= ~grbm_soft_reset;
4153 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4154 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4155 		}
4156 
4157 		/* Wait a little for things to settle down */
4158 		udelay(50);
4159 	}
4160 	return 0;
4161 }
4162 
4163 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4164 {
4165 	signed long r, cnt = 0;
4166 	unsigned long flags;
4167 	uint32_t seq, reg_val_offs = 0;
4168 	uint64_t value = 0;
4169 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4170 	struct amdgpu_ring *ring = &kiq->ring;
4171 
4172 	BUG_ON(!ring->funcs->emit_rreg);
4173 
4174 	spin_lock_irqsave(&kiq->ring_lock, flags);
4175 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4176 		pr_err("critical bug! too many kiq readers\n");
4177 		goto failed_unlock;
4178 	}
4179 	amdgpu_ring_alloc(ring, 32);
4180 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4181 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4182 				(5 << 8) |	/* dst: memory */
4183 				(1 << 16) |	/* count sel */
4184 				(1 << 20));	/* write confirm */
4185 	amdgpu_ring_write(ring, 0);
4186 	amdgpu_ring_write(ring, 0);
4187 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4188 				reg_val_offs * 4));
4189 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4190 				reg_val_offs * 4));
4191 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4192 	if (r)
4193 		goto failed_undo;
4194 
4195 	amdgpu_ring_commit(ring);
4196 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4197 
4198 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4199 
4200 	/* don't wait anymore for gpu reset case because this way may
4201 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4202 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4203 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4204 	 * gpu_recover() hang there.
4205 	 *
4206 	 * also don't wait anymore for IRQ context
4207 	 * */
4208 	if (r < 1 && (amdgpu_in_reset(adev)))
4209 		goto failed_kiq_read;
4210 
4211 	might_sleep();
4212 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4213 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4214 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4215 	}
4216 
4217 	if (cnt > MAX_KIQ_REG_TRY)
4218 		goto failed_kiq_read;
4219 
4220 	mb();
4221 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4222 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4223 	amdgpu_device_wb_free(adev, reg_val_offs);
4224 	return value;
4225 
4226 failed_undo:
4227 	amdgpu_ring_undo(ring);
4228 failed_unlock:
4229 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4230 failed_kiq_read:
4231 	if (reg_val_offs)
4232 		amdgpu_device_wb_free(adev, reg_val_offs);
4233 	pr_err("failed to read gpu clock\n");
4234 	return ~0;
4235 }
4236 
4237 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4238 {
4239 	uint64_t clock;
4240 
4241 	amdgpu_gfx_off_ctrl(adev, false);
4242 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4243 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4244 		clock = gfx_v9_0_kiq_read_clock(adev);
4245 	} else {
4246 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4247 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4248 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4249 	}
4250 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4251 	amdgpu_gfx_off_ctrl(adev, true);
4252 	return clock;
4253 }
4254 
4255 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4256 					  uint32_t vmid,
4257 					  uint32_t gds_base, uint32_t gds_size,
4258 					  uint32_t gws_base, uint32_t gws_size,
4259 					  uint32_t oa_base, uint32_t oa_size)
4260 {
4261 	struct amdgpu_device *adev = ring->adev;
4262 
4263 	/* GDS Base */
4264 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4265 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4266 				   gds_base);
4267 
4268 	/* GDS Size */
4269 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4270 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4271 				   gds_size);
4272 
4273 	/* GWS */
4274 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4275 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4276 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4277 
4278 	/* OA */
4279 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4280 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4281 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4282 }
4283 
4284 static const u32 vgpr_init_compute_shader[] =
4285 {
4286 	0xb07c0000, 0xbe8000ff,
4287 	0x000000f8, 0xbf110800,
4288 	0x7e000280, 0x7e020280,
4289 	0x7e040280, 0x7e060280,
4290 	0x7e080280, 0x7e0a0280,
4291 	0x7e0c0280, 0x7e0e0280,
4292 	0x80808800, 0xbe803200,
4293 	0xbf84fff5, 0xbf9c0000,
4294 	0xd28c0001, 0x0001007f,
4295 	0xd28d0001, 0x0002027e,
4296 	0x10020288, 0xb8810904,
4297 	0xb7814000, 0xd1196a01,
4298 	0x00000301, 0xbe800087,
4299 	0xbefc00c1, 0xd89c4000,
4300 	0x00020201, 0xd89cc080,
4301 	0x00040401, 0x320202ff,
4302 	0x00000800, 0x80808100,
4303 	0xbf84fff8, 0x7e020280,
4304 	0xbf810000, 0x00000000,
4305 };
4306 
4307 static const u32 sgpr_init_compute_shader[] =
4308 {
4309 	0xb07c0000, 0xbe8000ff,
4310 	0x0000005f, 0xbee50080,
4311 	0xbe812c65, 0xbe822c65,
4312 	0xbe832c65, 0xbe842c65,
4313 	0xbe852c65, 0xb77c0005,
4314 	0x80808500, 0xbf84fff8,
4315 	0xbe800080, 0xbf810000,
4316 };
4317 
4318 static const u32 vgpr_init_compute_shader_arcturus[] = {
4319 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4320 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4321 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4322 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4323 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4324 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4325 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4326 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4327 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4328 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4329 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4330 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4331 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4332 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4333 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4334 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4335 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4336 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4337 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4338 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4339 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4340 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4341 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4342 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4343 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4344 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4345 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4346 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4347 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4348 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4349 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4350 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4351 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4352 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4353 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4354 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4355 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4356 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4357 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4358 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4359 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4360 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4361 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4362 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4363 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4364 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4365 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4366 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4367 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4368 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4369 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4370 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4371 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4372 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4373 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4374 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4375 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4376 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4377 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4378 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4379 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4380 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4381 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4382 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4383 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4384 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4385 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4386 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4387 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4388 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4389 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4390 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4391 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4392 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4393 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4394 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4395 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4396 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4397 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4398 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4399 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4400 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4401 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4402 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4403 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4404 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4405 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4406 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4407 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4408 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4409 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4410 	0xbf84fff8, 0xbf810000,
4411 };
4412 
4413 /* When below register arrays changed, please update gpr_reg_size,
4414   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4415   to cover all gfx9 ASICs */
4416 static const struct soc15_reg_entry vgpr_init_regs[] = {
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4431 };
4432 
4433 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4448 };
4449 
4450 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4465 };
4466 
4467 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4482 };
4483 
4484 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4485    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4486    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4487    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4488    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4489    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4490    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4499    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4500    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4501    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4502    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4503    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4504    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4508    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4510    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4511    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4512    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4513    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4514    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4515    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4516    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4517    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4518 };
4519 
4520 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4521 {
4522 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4523 	int i, r;
4524 
4525 	/* only support when RAS is enabled */
4526 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4527 		return 0;
4528 
4529 	r = amdgpu_ring_alloc(ring, 7);
4530 	if (r) {
4531 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4532 			ring->name, r);
4533 		return r;
4534 	}
4535 
4536 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4537 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4538 
4539 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4540 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4541 				PACKET3_DMA_DATA_DST_SEL(1) |
4542 				PACKET3_DMA_DATA_SRC_SEL(2) |
4543 				PACKET3_DMA_DATA_ENGINE(0)));
4544 	amdgpu_ring_write(ring, 0);
4545 	amdgpu_ring_write(ring, 0);
4546 	amdgpu_ring_write(ring, 0);
4547 	amdgpu_ring_write(ring, 0);
4548 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4549 				adev->gds.gds_size);
4550 
4551 	amdgpu_ring_commit(ring);
4552 
4553 	for (i = 0; i < adev->usec_timeout; i++) {
4554 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4555 			break;
4556 		udelay(1);
4557 	}
4558 
4559 	if (i >= adev->usec_timeout)
4560 		r = -ETIMEDOUT;
4561 
4562 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4563 
4564 	return r;
4565 }
4566 
4567 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4568 {
4569 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4570 	struct amdgpu_ib ib;
4571 	struct dma_fence *f = NULL;
4572 	int r, i;
4573 	unsigned total_size, vgpr_offset, sgpr_offset;
4574 	u64 gpu_addr;
4575 
4576 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4577 						adev->gfx.config.max_cu_per_sh *
4578 						adev->gfx.config.max_sh_per_se;
4579 	int sgpr_work_group_size = 5;
4580 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4581 	int vgpr_init_shader_size;
4582 	const u32 *vgpr_init_shader_ptr;
4583 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4584 
4585 	/* only support when RAS is enabled */
4586 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4587 		return 0;
4588 
4589 	/* bail if the compute ring is not ready */
4590 	if (!ring->sched.ready)
4591 		return 0;
4592 
4593 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4594 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4595 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4596 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4597 	} else {
4598 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4599 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4600 		vgpr_init_regs_ptr = vgpr_init_regs;
4601 	}
4602 
4603 	total_size =
4604 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4605 	total_size +=
4606 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4607 	total_size +=
4608 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4609 	total_size = ALIGN(total_size, 256);
4610 	vgpr_offset = total_size;
4611 	total_size += ALIGN(vgpr_init_shader_size, 256);
4612 	sgpr_offset = total_size;
4613 	total_size += sizeof(sgpr_init_compute_shader);
4614 
4615 	/* allocate an indirect buffer to put the commands in */
4616 	memset(&ib, 0, sizeof(ib));
4617 	r = amdgpu_ib_get(adev, NULL, total_size,
4618 					AMDGPU_IB_POOL_DIRECT, &ib);
4619 	if (r) {
4620 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4621 		return r;
4622 	}
4623 
4624 	/* load the compute shaders */
4625 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4626 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4627 
4628 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4629 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4630 
4631 	/* init the ib length to 0 */
4632 	ib.length_dw = 0;
4633 
4634 	/* VGPR */
4635 	/* write the register state for the compute dispatch */
4636 	for (i = 0; i < gpr_reg_size; i++) {
4637 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4638 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4639 								- PACKET3_SET_SH_REG_START;
4640 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4641 	}
4642 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4643 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4644 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4645 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4646 							- PACKET3_SET_SH_REG_START;
4647 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4648 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4649 
4650 	/* write dispatch packet */
4651 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4652 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4653 	ib.ptr[ib.length_dw++] = 1; /* y */
4654 	ib.ptr[ib.length_dw++] = 1; /* z */
4655 	ib.ptr[ib.length_dw++] =
4656 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4657 
4658 	/* write CS partial flush packet */
4659 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4660 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4661 
4662 	/* SGPR1 */
4663 	/* write the register state for the compute dispatch */
4664 	for (i = 0; i < gpr_reg_size; i++) {
4665 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4666 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4667 								- PACKET3_SET_SH_REG_START;
4668 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4669 	}
4670 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4671 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4672 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4673 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4674 							- PACKET3_SET_SH_REG_START;
4675 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4676 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4677 
4678 	/* write dispatch packet */
4679 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4680 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4681 	ib.ptr[ib.length_dw++] = 1; /* y */
4682 	ib.ptr[ib.length_dw++] = 1; /* z */
4683 	ib.ptr[ib.length_dw++] =
4684 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4685 
4686 	/* write CS partial flush packet */
4687 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4688 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4689 
4690 	/* SGPR2 */
4691 	/* write the register state for the compute dispatch */
4692 	for (i = 0; i < gpr_reg_size; i++) {
4693 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4694 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4695 								- PACKET3_SET_SH_REG_START;
4696 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4697 	}
4698 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4699 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4700 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4701 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4702 							- PACKET3_SET_SH_REG_START;
4703 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4704 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4705 
4706 	/* write dispatch packet */
4707 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4708 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4709 	ib.ptr[ib.length_dw++] = 1; /* y */
4710 	ib.ptr[ib.length_dw++] = 1; /* z */
4711 	ib.ptr[ib.length_dw++] =
4712 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4713 
4714 	/* write CS partial flush packet */
4715 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4716 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4717 
4718 	/* shedule the ib on the ring */
4719 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4720 	if (r) {
4721 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4722 		goto fail;
4723 	}
4724 
4725 	/* wait for the GPU to finish processing the IB */
4726 	r = dma_fence_wait(f, false);
4727 	if (r) {
4728 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4729 		goto fail;
4730 	}
4731 
4732 fail:
4733 	amdgpu_ib_free(adev, &ib, NULL);
4734 	dma_fence_put(f);
4735 
4736 	return r;
4737 }
4738 
4739 static int gfx_v9_0_early_init(void *handle)
4740 {
4741 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4742 
4743 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4744 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4745 		adev->gfx.num_gfx_rings = 0;
4746 	else
4747 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4748 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4749 					  AMDGPU_MAX_COMPUTE_RINGS);
4750 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4751 	gfx_v9_0_set_ring_funcs(adev);
4752 	gfx_v9_0_set_irq_funcs(adev);
4753 	gfx_v9_0_set_gds_init(adev);
4754 	gfx_v9_0_set_rlc_funcs(adev);
4755 
4756 	return 0;
4757 }
4758 
4759 static int gfx_v9_0_ecc_late_init(void *handle)
4760 {
4761 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4762 	int r;
4763 
4764 	/*
4765 	 * Temp workaround to fix the issue that CP firmware fails to
4766 	 * update read pointer when CPDMA is writing clearing operation
4767 	 * to GDS in suspend/resume sequence on several cards. So just
4768 	 * limit this operation in cold boot sequence.
4769 	 */
4770 	if ((!adev->in_suspend) &&
4771 	    (adev->gds.gds_size)) {
4772 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4773 		if (r)
4774 			return r;
4775 	}
4776 
4777 	/* requires IBs so do in late init after IB pool is initialized */
4778 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4779 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4780 	else
4781 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4782 
4783 	if (r)
4784 		return r;
4785 
4786 	if (adev->gfx.ras_funcs &&
4787 	    adev->gfx.ras_funcs->ras_late_init) {
4788 		r = adev->gfx.ras_funcs->ras_late_init(adev);
4789 		if (r)
4790 			return r;
4791 	}
4792 
4793 	if (adev->gfx.ras_funcs &&
4794 	    adev->gfx.ras_funcs->enable_watchdog_timer)
4795 		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4796 
4797 	return 0;
4798 }
4799 
4800 static int gfx_v9_0_late_init(void *handle)
4801 {
4802 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4803 	int r;
4804 
4805 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4806 	if (r)
4807 		return r;
4808 
4809 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4810 	if (r)
4811 		return r;
4812 
4813 	r = gfx_v9_0_ecc_late_init(handle);
4814 	if (r)
4815 		return r;
4816 
4817 	return 0;
4818 }
4819 
4820 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4821 {
4822 	uint32_t rlc_setting;
4823 
4824 	/* if RLC is not enabled, do nothing */
4825 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4826 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4827 		return false;
4828 
4829 	return true;
4830 }
4831 
4832 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4833 {
4834 	uint32_t data;
4835 	unsigned i;
4836 
4837 	data = RLC_SAFE_MODE__CMD_MASK;
4838 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4839 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4840 
4841 	/* wait for RLC_SAFE_MODE */
4842 	for (i = 0; i < adev->usec_timeout; i++) {
4843 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4844 			break;
4845 		udelay(1);
4846 	}
4847 }
4848 
4849 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4850 {
4851 	uint32_t data;
4852 
4853 	data = RLC_SAFE_MODE__CMD_MASK;
4854 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4855 }
4856 
4857 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4858 						bool enable)
4859 {
4860 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4861 
4862 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4863 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4864 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4865 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4866 	} else {
4867 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4868 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4869 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4870 	}
4871 
4872 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4873 }
4874 
4875 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4876 						bool enable)
4877 {
4878 	/* TODO: double check if we need to perform under safe mode */
4879 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4880 
4881 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4882 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4883 	else
4884 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4885 
4886 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4887 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4888 	else
4889 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4890 
4891 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4892 }
4893 
4894 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4895 						      bool enable)
4896 {
4897 	uint32_t data, def;
4898 
4899 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4900 
4901 	/* It is disabled by HW by default */
4902 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4903 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4904 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4905 
4906 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4907 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4908 
4909 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4910 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4911 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4912 
4913 		/* only for Vega10 & Raven1 */
4914 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4915 
4916 		if (def != data)
4917 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4918 
4919 		/* MGLS is a global flag to control all MGLS in GFX */
4920 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4921 			/* 2 - RLC memory Light sleep */
4922 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4923 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4924 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4925 				if (def != data)
4926 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4927 			}
4928 			/* 3 - CP memory Light sleep */
4929 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4930 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4931 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4932 				if (def != data)
4933 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4934 			}
4935 		}
4936 	} else {
4937 		/* 1 - MGCG_OVERRIDE */
4938 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4939 
4940 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4941 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4942 
4943 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4944 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4945 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4946 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4947 
4948 		if (def != data)
4949 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4950 
4951 		/* 2 - disable MGLS in RLC */
4952 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4953 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4954 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4955 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4956 		}
4957 
4958 		/* 3 - disable MGLS in CP */
4959 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4960 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4961 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4962 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4963 		}
4964 	}
4965 
4966 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4967 }
4968 
4969 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4970 					   bool enable)
4971 {
4972 	uint32_t data, def;
4973 
4974 	if (!adev->gfx.num_gfx_rings)
4975 		return;
4976 
4977 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4978 
4979 	/* Enable 3D CGCG/CGLS */
4980 	if (enable) {
4981 		/* write cmd to clear cgcg/cgls ov */
4982 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4983 		/* unset CGCG override */
4984 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4985 		/* update CGCG and CGLS override bits */
4986 		if (def != data)
4987 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4988 
4989 		/* enable 3Dcgcg FSM(0x0000363f) */
4990 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4991 
4992 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4993 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4994 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4995 		else
4996 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4997 
4998 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4999 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5000 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5001 		if (def != data)
5002 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5003 
5004 		/* set IDLE_POLL_COUNT(0x00900100) */
5005 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5006 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5007 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5008 		if (def != data)
5009 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5010 	} else {
5011 		/* Disable CGCG/CGLS */
5012 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5013 		/* disable cgcg, cgls should be disabled */
5014 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5015 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5016 		/* disable cgcg and cgls in FSM */
5017 		if (def != data)
5018 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5019 	}
5020 
5021 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5022 }
5023 
5024 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5025 						      bool enable)
5026 {
5027 	uint32_t def, data;
5028 
5029 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5030 
5031 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5032 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5033 		/* unset CGCG override */
5034 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5035 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5036 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5037 		else
5038 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5039 		/* update CGCG and CGLS override bits */
5040 		if (def != data)
5041 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5042 
5043 		/* enable cgcg FSM(0x0000363F) */
5044 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5045 
5046 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5047 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5048 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5049 		else
5050 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5051 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5052 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5053 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5054 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5055 		if (def != data)
5056 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5057 
5058 		/* set IDLE_POLL_COUNT(0x00900100) */
5059 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5060 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5061 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5062 		if (def != data)
5063 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5064 	} else {
5065 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5066 		/* reset CGCG/CGLS bits */
5067 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5068 		/* disable cgcg and cgls in FSM */
5069 		if (def != data)
5070 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5071 	}
5072 
5073 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5074 }
5075 
5076 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5077 					    bool enable)
5078 {
5079 	if (enable) {
5080 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5081 		 * ===  MGCG + MGLS ===
5082 		 */
5083 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5084 		/* ===  CGCG /CGLS for GFX 3D Only === */
5085 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5086 		/* ===  CGCG + CGLS === */
5087 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5088 	} else {
5089 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5090 		 * ===  CGCG + CGLS ===
5091 		 */
5092 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5093 		/* ===  CGCG /CGLS for GFX 3D Only === */
5094 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5095 		/* ===  MGCG + MGLS === */
5096 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5097 	}
5098 	return 0;
5099 }
5100 
5101 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5102 {
5103 	u32 reg, data;
5104 
5105 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5106 	if (amdgpu_sriov_is_pp_one_vf(adev))
5107 		data = RREG32_NO_KIQ(reg);
5108 	else
5109 		data = RREG32(reg);
5110 
5111 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5112 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5113 
5114 	if (amdgpu_sriov_is_pp_one_vf(adev))
5115 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5116 	else
5117 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5118 }
5119 
5120 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5121 					uint32_t offset,
5122 					struct soc15_reg_rlcg *entries, int arr_size)
5123 {
5124 	int i;
5125 	uint32_t reg;
5126 
5127 	if (!entries)
5128 		return false;
5129 
5130 	for (i = 0; i < arr_size; i++) {
5131 		const struct soc15_reg_rlcg *entry;
5132 
5133 		entry = &entries[i];
5134 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5135 		if (offset == reg)
5136 			return true;
5137 	}
5138 
5139 	return false;
5140 }
5141 
5142 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5143 {
5144 	return gfx_v9_0_check_rlcg_range(adev, offset,
5145 					(void *)rlcg_access_gc_9_0,
5146 					ARRAY_SIZE(rlcg_access_gc_9_0));
5147 }
5148 
5149 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5150 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5151 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5152 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5153 	.init = gfx_v9_0_rlc_init,
5154 	.get_csb_size = gfx_v9_0_get_csb_size,
5155 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5156 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5157 	.resume = gfx_v9_0_rlc_resume,
5158 	.stop = gfx_v9_0_rlc_stop,
5159 	.reset = gfx_v9_0_rlc_reset,
5160 	.start = gfx_v9_0_rlc_start,
5161 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5162 	.sriov_wreg = gfx_v9_0_sriov_wreg,
5163 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5164 };
5165 
5166 static int gfx_v9_0_set_powergating_state(void *handle,
5167 					  enum amd_powergating_state state)
5168 {
5169 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5170 	bool enable = (state == AMD_PG_STATE_GATE);
5171 
5172 	switch (adev->ip_versions[GC_HWIP][0]) {
5173 	case IP_VERSION(9, 2, 2):
5174 	case IP_VERSION(9, 1, 0):
5175 	case IP_VERSION(9, 3, 0):
5176 		if (!enable)
5177 			amdgpu_gfx_off_ctrl(adev, false);
5178 
5179 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5180 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5181 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5182 		} else {
5183 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5184 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5185 		}
5186 
5187 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5188 			gfx_v9_0_enable_cp_power_gating(adev, true);
5189 		else
5190 			gfx_v9_0_enable_cp_power_gating(adev, false);
5191 
5192 		/* update gfx cgpg state */
5193 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5194 
5195 		/* update mgcg state */
5196 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5197 
5198 		if (enable)
5199 			amdgpu_gfx_off_ctrl(adev, true);
5200 		break;
5201 	case IP_VERSION(9, 2, 1):
5202 		amdgpu_gfx_off_ctrl(adev, enable);
5203 		break;
5204 	default:
5205 		break;
5206 	}
5207 
5208 	return 0;
5209 }
5210 
5211 static int gfx_v9_0_set_clockgating_state(void *handle,
5212 					  enum amd_clockgating_state state)
5213 {
5214 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5215 
5216 	if (amdgpu_sriov_vf(adev))
5217 		return 0;
5218 
5219 	switch (adev->ip_versions[GC_HWIP][0]) {
5220 	case IP_VERSION(9, 0, 1):
5221 	case IP_VERSION(9, 2, 1):
5222 	case IP_VERSION(9, 4, 0):
5223 	case IP_VERSION(9, 2, 2):
5224 	case IP_VERSION(9, 1, 0):
5225 	case IP_VERSION(9, 4, 1):
5226 	case IP_VERSION(9, 3, 0):
5227 	case IP_VERSION(9, 4, 2):
5228 		gfx_v9_0_update_gfx_clock_gating(adev,
5229 						 state == AMD_CG_STATE_GATE);
5230 		break;
5231 	default:
5232 		break;
5233 	}
5234 	return 0;
5235 }
5236 
5237 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5238 {
5239 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5240 	int data;
5241 
5242 	if (amdgpu_sriov_vf(adev))
5243 		*flags = 0;
5244 
5245 	/* AMD_CG_SUPPORT_GFX_MGCG */
5246 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5247 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5248 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5249 
5250 	/* AMD_CG_SUPPORT_GFX_CGCG */
5251 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5252 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5253 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5254 
5255 	/* AMD_CG_SUPPORT_GFX_CGLS */
5256 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5257 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5258 
5259 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5260 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5261 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5262 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5263 
5264 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5265 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5266 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5267 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5268 
5269 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5270 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5271 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5272 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5273 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5274 
5275 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5276 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5277 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5278 	}
5279 }
5280 
5281 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5282 {
5283 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5284 }
5285 
5286 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5287 {
5288 	struct amdgpu_device *adev = ring->adev;
5289 	u64 wptr;
5290 
5291 	/* XXX check if swapping is necessary on BE */
5292 	if (ring->use_doorbell) {
5293 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5294 	} else {
5295 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5296 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5297 	}
5298 
5299 	return wptr;
5300 }
5301 
5302 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5303 {
5304 	struct amdgpu_device *adev = ring->adev;
5305 
5306 	if (ring->use_doorbell) {
5307 		/* XXX check if swapping is necessary on BE */
5308 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5309 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5310 	} else {
5311 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5312 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5313 	}
5314 }
5315 
5316 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5317 {
5318 	struct amdgpu_device *adev = ring->adev;
5319 	u32 ref_and_mask, reg_mem_engine;
5320 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5321 
5322 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5323 		switch (ring->me) {
5324 		case 1:
5325 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5326 			break;
5327 		case 2:
5328 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5329 			break;
5330 		default:
5331 			return;
5332 		}
5333 		reg_mem_engine = 0;
5334 	} else {
5335 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5336 		reg_mem_engine = 1; /* pfp */
5337 	}
5338 
5339 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5340 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5341 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5342 			      ref_and_mask, ref_and_mask, 0x20);
5343 }
5344 
5345 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5346 					struct amdgpu_job *job,
5347 					struct amdgpu_ib *ib,
5348 					uint32_t flags)
5349 {
5350 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5351 	u32 header, control = 0;
5352 
5353 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5354 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5355 	else
5356 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5357 
5358 	control |= ib->length_dw | (vmid << 24);
5359 
5360 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5361 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5362 
5363 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5364 			gfx_v9_0_ring_emit_de_meta(ring);
5365 	}
5366 
5367 	amdgpu_ring_write(ring, header);
5368 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5369 	amdgpu_ring_write(ring,
5370 #ifdef __BIG_ENDIAN
5371 		(2 << 0) |
5372 #endif
5373 		lower_32_bits(ib->gpu_addr));
5374 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5375 	amdgpu_ring_write(ring, control);
5376 }
5377 
5378 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5379 					  struct amdgpu_job *job,
5380 					  struct amdgpu_ib *ib,
5381 					  uint32_t flags)
5382 {
5383 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5384 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5385 
5386 	/* Currently, there is a high possibility to get wave ID mismatch
5387 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5388 	 * different wave IDs than the GDS expects. This situation happens
5389 	 * randomly when at least 5 compute pipes use GDS ordered append.
5390 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5391 	 * Those are probably bugs somewhere else in the kernel driver.
5392 	 *
5393 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5394 	 * GDS to 0 for this ring (me/pipe).
5395 	 */
5396 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5397 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5398 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5399 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5400 	}
5401 
5402 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5403 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5404 	amdgpu_ring_write(ring,
5405 #ifdef __BIG_ENDIAN
5406 				(2 << 0) |
5407 #endif
5408 				lower_32_bits(ib->gpu_addr));
5409 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5410 	amdgpu_ring_write(ring, control);
5411 }
5412 
5413 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5414 				     u64 seq, unsigned flags)
5415 {
5416 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5417 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5418 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5419 
5420 	/* RELEASE_MEM - flush caches, send int */
5421 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5422 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5423 					       EOP_TC_NC_ACTION_EN) :
5424 					      (EOP_TCL1_ACTION_EN |
5425 					       EOP_TC_ACTION_EN |
5426 					       EOP_TC_WB_ACTION_EN |
5427 					       EOP_TC_MD_ACTION_EN)) |
5428 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5429 				 EVENT_INDEX(5)));
5430 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5431 
5432 	/*
5433 	 * the address should be Qword aligned if 64bit write, Dword
5434 	 * aligned if only send 32bit data low (discard data high)
5435 	 */
5436 	if (write64bit)
5437 		BUG_ON(addr & 0x7);
5438 	else
5439 		BUG_ON(addr & 0x3);
5440 	amdgpu_ring_write(ring, lower_32_bits(addr));
5441 	amdgpu_ring_write(ring, upper_32_bits(addr));
5442 	amdgpu_ring_write(ring, lower_32_bits(seq));
5443 	amdgpu_ring_write(ring, upper_32_bits(seq));
5444 	amdgpu_ring_write(ring, 0);
5445 }
5446 
5447 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5448 {
5449 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5450 	uint32_t seq = ring->fence_drv.sync_seq;
5451 	uint64_t addr = ring->fence_drv.gpu_addr;
5452 
5453 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5454 			      lower_32_bits(addr), upper_32_bits(addr),
5455 			      seq, 0xffffffff, 4);
5456 }
5457 
5458 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5459 					unsigned vmid, uint64_t pd_addr)
5460 {
5461 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5462 
5463 	/* compute doesn't have PFP */
5464 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5465 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5466 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5467 		amdgpu_ring_write(ring, 0x0);
5468 	}
5469 }
5470 
5471 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5472 {
5473 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5474 }
5475 
5476 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5477 {
5478 	u64 wptr;
5479 
5480 	/* XXX check if swapping is necessary on BE */
5481 	if (ring->use_doorbell)
5482 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5483 	else
5484 		BUG();
5485 	return wptr;
5486 }
5487 
5488 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5489 {
5490 	struct amdgpu_device *adev = ring->adev;
5491 
5492 	/* XXX check if swapping is necessary on BE */
5493 	if (ring->use_doorbell) {
5494 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5495 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5496 	} else{
5497 		BUG(); /* only DOORBELL method supported on gfx9 now */
5498 	}
5499 }
5500 
5501 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5502 					 u64 seq, unsigned int flags)
5503 {
5504 	struct amdgpu_device *adev = ring->adev;
5505 
5506 	/* we only allocate 32bit for each seq wb address */
5507 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5508 
5509 	/* write fence seq to the "addr" */
5510 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5511 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5512 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5513 	amdgpu_ring_write(ring, lower_32_bits(addr));
5514 	amdgpu_ring_write(ring, upper_32_bits(addr));
5515 	amdgpu_ring_write(ring, lower_32_bits(seq));
5516 
5517 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5518 		/* set register to trigger INT */
5519 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5520 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5521 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5522 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5523 		amdgpu_ring_write(ring, 0);
5524 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5525 	}
5526 }
5527 
5528 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5529 {
5530 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5531 	amdgpu_ring_write(ring, 0);
5532 }
5533 
5534 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5535 {
5536 	struct v9_ce_ib_state ce_payload = {0};
5537 	uint64_t csa_addr;
5538 	int cnt;
5539 
5540 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5541 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5542 
5543 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5544 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5545 				 WRITE_DATA_DST_SEL(8) |
5546 				 WR_CONFIRM) |
5547 				 WRITE_DATA_CACHE_POLICY(0));
5548 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5549 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5550 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5551 }
5552 
5553 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5554 {
5555 	struct v9_de_ib_state de_payload = {0};
5556 	uint64_t csa_addr, gds_addr;
5557 	int cnt;
5558 
5559 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5560 	gds_addr = csa_addr + 4096;
5561 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5562 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5563 
5564 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5565 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5566 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5567 				 WRITE_DATA_DST_SEL(8) |
5568 				 WR_CONFIRM) |
5569 				 WRITE_DATA_CACHE_POLICY(0));
5570 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5571 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5572 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5573 }
5574 
5575 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5576 				   bool secure)
5577 {
5578 	uint32_t v = secure ? FRAME_TMZ : 0;
5579 
5580 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5581 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5582 }
5583 
5584 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5585 {
5586 	uint32_t dw2 = 0;
5587 
5588 	if (amdgpu_sriov_vf(ring->adev))
5589 		gfx_v9_0_ring_emit_ce_meta(ring);
5590 
5591 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5592 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5593 		/* set load_global_config & load_global_uconfig */
5594 		dw2 |= 0x8001;
5595 		/* set load_cs_sh_regs */
5596 		dw2 |= 0x01000000;
5597 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5598 		dw2 |= 0x10002;
5599 
5600 		/* set load_ce_ram if preamble presented */
5601 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5602 			dw2 |= 0x10000000;
5603 	} else {
5604 		/* still load_ce_ram if this is the first time preamble presented
5605 		 * although there is no context switch happens.
5606 		 */
5607 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5608 			dw2 |= 0x10000000;
5609 	}
5610 
5611 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5612 	amdgpu_ring_write(ring, dw2);
5613 	amdgpu_ring_write(ring, 0);
5614 }
5615 
5616 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5617 {
5618 	unsigned ret;
5619 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5620 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5621 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5622 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5623 	ret = ring->wptr & ring->buf_mask;
5624 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5625 	return ret;
5626 }
5627 
5628 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5629 {
5630 	unsigned cur;
5631 	BUG_ON(offset > ring->buf_mask);
5632 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5633 
5634 	cur = (ring->wptr & ring->buf_mask) - 1;
5635 	if (likely(cur > offset))
5636 		ring->ring[offset] = cur - offset;
5637 	else
5638 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5639 }
5640 
5641 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5642 				    uint32_t reg_val_offs)
5643 {
5644 	struct amdgpu_device *adev = ring->adev;
5645 
5646 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5647 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5648 				(5 << 8) |	/* dst: memory */
5649 				(1 << 20));	/* write confirm */
5650 	amdgpu_ring_write(ring, reg);
5651 	amdgpu_ring_write(ring, 0);
5652 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5653 				reg_val_offs * 4));
5654 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5655 				reg_val_offs * 4));
5656 }
5657 
5658 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5659 				    uint32_t val)
5660 {
5661 	uint32_t cmd = 0;
5662 
5663 	switch (ring->funcs->type) {
5664 	case AMDGPU_RING_TYPE_GFX:
5665 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5666 		break;
5667 	case AMDGPU_RING_TYPE_KIQ:
5668 		cmd = (1 << 16); /* no inc addr */
5669 		break;
5670 	default:
5671 		cmd = WR_CONFIRM;
5672 		break;
5673 	}
5674 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675 	amdgpu_ring_write(ring, cmd);
5676 	amdgpu_ring_write(ring, reg);
5677 	amdgpu_ring_write(ring, 0);
5678 	amdgpu_ring_write(ring, val);
5679 }
5680 
5681 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5682 					uint32_t val, uint32_t mask)
5683 {
5684 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5685 }
5686 
5687 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5688 						  uint32_t reg0, uint32_t reg1,
5689 						  uint32_t ref, uint32_t mask)
5690 {
5691 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5692 	struct amdgpu_device *adev = ring->adev;
5693 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5694 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5695 
5696 	if (fw_version_ok)
5697 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5698 				      ref, mask, 0x20);
5699 	else
5700 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5701 							   ref, mask);
5702 }
5703 
5704 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5705 {
5706 	struct amdgpu_device *adev = ring->adev;
5707 	uint32_t value = 0;
5708 
5709 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5710 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5711 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5712 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5713 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5714 }
5715 
5716 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5717 						 enum amdgpu_interrupt_state state)
5718 {
5719 	switch (state) {
5720 	case AMDGPU_IRQ_STATE_DISABLE:
5721 	case AMDGPU_IRQ_STATE_ENABLE:
5722 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5723 			       TIME_STAMP_INT_ENABLE,
5724 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5725 		break;
5726 	default:
5727 		break;
5728 	}
5729 }
5730 
5731 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5732 						     int me, int pipe,
5733 						     enum amdgpu_interrupt_state state)
5734 {
5735 	u32 mec_int_cntl, mec_int_cntl_reg;
5736 
5737 	/*
5738 	 * amdgpu controls only the first MEC. That's why this function only
5739 	 * handles the setting of interrupts for this specific MEC. All other
5740 	 * pipes' interrupts are set by amdkfd.
5741 	 */
5742 
5743 	if (me == 1) {
5744 		switch (pipe) {
5745 		case 0:
5746 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5747 			break;
5748 		case 1:
5749 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5750 			break;
5751 		case 2:
5752 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5753 			break;
5754 		case 3:
5755 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5756 			break;
5757 		default:
5758 			DRM_DEBUG("invalid pipe %d\n", pipe);
5759 			return;
5760 		}
5761 	} else {
5762 		DRM_DEBUG("invalid me %d\n", me);
5763 		return;
5764 	}
5765 
5766 	switch (state) {
5767 	case AMDGPU_IRQ_STATE_DISABLE:
5768 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5769 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5770 					     TIME_STAMP_INT_ENABLE, 0);
5771 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5772 		break;
5773 	case AMDGPU_IRQ_STATE_ENABLE:
5774 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5775 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5776 					     TIME_STAMP_INT_ENABLE, 1);
5777 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5778 		break;
5779 	default:
5780 		break;
5781 	}
5782 }
5783 
5784 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5785 					     struct amdgpu_irq_src *source,
5786 					     unsigned type,
5787 					     enum amdgpu_interrupt_state state)
5788 {
5789 	switch (state) {
5790 	case AMDGPU_IRQ_STATE_DISABLE:
5791 	case AMDGPU_IRQ_STATE_ENABLE:
5792 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5793 			       PRIV_REG_INT_ENABLE,
5794 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5795 		break;
5796 	default:
5797 		break;
5798 	}
5799 
5800 	return 0;
5801 }
5802 
5803 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5804 					      struct amdgpu_irq_src *source,
5805 					      unsigned type,
5806 					      enum amdgpu_interrupt_state state)
5807 {
5808 	switch (state) {
5809 	case AMDGPU_IRQ_STATE_DISABLE:
5810 	case AMDGPU_IRQ_STATE_ENABLE:
5811 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5812 			       PRIV_INSTR_INT_ENABLE,
5813 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5814 		break;
5815 	default:
5816 		break;
5817 	}
5818 
5819 	return 0;
5820 }
5821 
5822 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5823 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5824 			CP_ECC_ERROR_INT_ENABLE, 1)
5825 
5826 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5827 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5828 			CP_ECC_ERROR_INT_ENABLE, 0)
5829 
5830 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5831 					      struct amdgpu_irq_src *source,
5832 					      unsigned type,
5833 					      enum amdgpu_interrupt_state state)
5834 {
5835 	switch (state) {
5836 	case AMDGPU_IRQ_STATE_DISABLE:
5837 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5838 				CP_ECC_ERROR_INT_ENABLE, 0);
5839 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5840 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5841 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5842 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5843 		break;
5844 
5845 	case AMDGPU_IRQ_STATE_ENABLE:
5846 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5847 				CP_ECC_ERROR_INT_ENABLE, 1);
5848 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5849 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5850 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5851 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5852 		break;
5853 	default:
5854 		break;
5855 	}
5856 
5857 	return 0;
5858 }
5859 
5860 
5861 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5862 					    struct amdgpu_irq_src *src,
5863 					    unsigned type,
5864 					    enum amdgpu_interrupt_state state)
5865 {
5866 	switch (type) {
5867 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5868 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5869 		break;
5870 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5871 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5872 		break;
5873 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5874 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5875 		break;
5876 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5877 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5878 		break;
5879 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5880 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5881 		break;
5882 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5883 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5884 		break;
5885 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5886 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5887 		break;
5888 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5889 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5890 		break;
5891 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5892 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5893 		break;
5894 	default:
5895 		break;
5896 	}
5897 	return 0;
5898 }
5899 
5900 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5901 			    struct amdgpu_irq_src *source,
5902 			    struct amdgpu_iv_entry *entry)
5903 {
5904 	int i;
5905 	u8 me_id, pipe_id, queue_id;
5906 	struct amdgpu_ring *ring;
5907 
5908 	DRM_DEBUG("IH: CP EOP\n");
5909 	me_id = (entry->ring_id & 0x0c) >> 2;
5910 	pipe_id = (entry->ring_id & 0x03) >> 0;
5911 	queue_id = (entry->ring_id & 0x70) >> 4;
5912 
5913 	switch (me_id) {
5914 	case 0:
5915 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5916 		break;
5917 	case 1:
5918 	case 2:
5919 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5920 			ring = &adev->gfx.compute_ring[i];
5921 			/* Per-queue interrupt is supported for MEC starting from VI.
5922 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5923 			  */
5924 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5925 				amdgpu_fence_process(ring);
5926 		}
5927 		break;
5928 	}
5929 	return 0;
5930 }
5931 
5932 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5933 			   struct amdgpu_iv_entry *entry)
5934 {
5935 	u8 me_id, pipe_id, queue_id;
5936 	struct amdgpu_ring *ring;
5937 	int i;
5938 
5939 	me_id = (entry->ring_id & 0x0c) >> 2;
5940 	pipe_id = (entry->ring_id & 0x03) >> 0;
5941 	queue_id = (entry->ring_id & 0x70) >> 4;
5942 
5943 	switch (me_id) {
5944 	case 0:
5945 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5946 		break;
5947 	case 1:
5948 	case 2:
5949 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5950 			ring = &adev->gfx.compute_ring[i];
5951 			if (ring->me == me_id && ring->pipe == pipe_id &&
5952 			    ring->queue == queue_id)
5953 				drm_sched_fault(&ring->sched);
5954 		}
5955 		break;
5956 	}
5957 }
5958 
5959 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5960 				 struct amdgpu_irq_src *source,
5961 				 struct amdgpu_iv_entry *entry)
5962 {
5963 	DRM_ERROR("Illegal register access in command stream\n");
5964 	gfx_v9_0_fault(adev, entry);
5965 	return 0;
5966 }
5967 
5968 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5969 				  struct amdgpu_irq_src *source,
5970 				  struct amdgpu_iv_entry *entry)
5971 {
5972 	DRM_ERROR("Illegal instruction in command stream\n");
5973 	gfx_v9_0_fault(adev, entry);
5974 	return 0;
5975 }
5976 
5977 
5978 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5979 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5980 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5981 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5982 	},
5983 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5984 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5985 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5986 	},
5987 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5988 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5989 	  0, 0
5990 	},
5991 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5992 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5993 	  0, 0
5994 	},
5995 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5996 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5997 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5998 	},
5999 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6000 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6001 	  0, 0
6002 	},
6003 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6004 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6005 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6006 	},
6007 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6008 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6009 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6010 	},
6011 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6012 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6013 	  0, 0
6014 	},
6015 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6016 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6017 	  0, 0
6018 	},
6019 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6020 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6021 	  0, 0
6022 	},
6023 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6024 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6025 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6026 	},
6027 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6028 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6029 	  0, 0
6030 	},
6031 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6032 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6033 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6034 	},
6035 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6036 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6037 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6038 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6039 	},
6040 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6041 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6042 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6043 	  0, 0
6044 	},
6045 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6046 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6047 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6048 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6049 	},
6050 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6051 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6052 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6053 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6054 	},
6055 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6056 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6057 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6058 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6059 	},
6060 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6061 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6062 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6063 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6064 	},
6065 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6066 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6067 	  0, 0
6068 	},
6069 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6070 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6071 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6072 	},
6073 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6074 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6075 	  0, 0
6076 	},
6077 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6078 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6079 	  0, 0
6080 	},
6081 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6082 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6083 	  0, 0
6084 	},
6085 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6086 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6087 	  0, 0
6088 	},
6089 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6090 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6091 	  0, 0
6092 	},
6093 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6094 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6095 	  0, 0
6096 	},
6097 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6098 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6099 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6100 	},
6101 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6102 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6103 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6104 	},
6105 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6106 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6107 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6108 	},
6109 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6110 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6111 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6112 	},
6113 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6114 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6115 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6116 	},
6117 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6118 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6119 	  0, 0
6120 	},
6121 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6122 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6123 	  0, 0
6124 	},
6125 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6126 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6127 	  0, 0
6128 	},
6129 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6130 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6131 	  0, 0
6132 	},
6133 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6134 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6135 	  0, 0
6136 	},
6137 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6138 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6139 	  0, 0
6140 	},
6141 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6142 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6143 	  0, 0
6144 	},
6145 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6146 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6147 	  0, 0
6148 	},
6149 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6150 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6151 	  0, 0
6152 	},
6153 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6154 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6155 	  0, 0
6156 	},
6157 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6158 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6159 	  0, 0
6160 	},
6161 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6162 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6163 	  0, 0
6164 	},
6165 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6166 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6167 	  0, 0
6168 	},
6169 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6170 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6171 	  0, 0
6172 	},
6173 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6174 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6175 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6176 	},
6177 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6178 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6179 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6180 	},
6181 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6182 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6183 	  0, 0
6184 	},
6185 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6186 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6187 	  0, 0
6188 	},
6189 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6190 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6191 	  0, 0
6192 	},
6193 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6194 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6195 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6196 	},
6197 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6198 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6199 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6200 	},
6201 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6202 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6203 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6204 	},
6205 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6206 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6207 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6208 	},
6209 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6210 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6211 	  0, 0
6212 	},
6213 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6214 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6215 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6216 	},
6217 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6218 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6219 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6220 	},
6221 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6222 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6223 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6224 	},
6225 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6226 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6227 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6228 	},
6229 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6230 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6231 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6232 	},
6233 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6234 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6235 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6236 	},
6237 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6238 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6239 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6240 	},
6241 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6242 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6243 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6244 	},
6245 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6246 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6247 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6248 	},
6249 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6250 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6251 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6252 	},
6253 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6254 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6255 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6256 	},
6257 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6258 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6259 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6260 	},
6261 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6262 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6263 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6264 	},
6265 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6266 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6267 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6268 	},
6269 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6270 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6271 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6272 	},
6273 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6274 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6275 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6276 	},
6277 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6278 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6279 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6280 	},
6281 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6282 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6283 	  0, 0
6284 	},
6285 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6286 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6287 	  0, 0
6288 	},
6289 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6290 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6291 	  0, 0
6292 	},
6293 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6294 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6295 	  0, 0
6296 	},
6297 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6298 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6299 	  0, 0
6300 	},
6301 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6302 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6303 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6304 	},
6305 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6306 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6307 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6308 	},
6309 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6310 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6311 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6312 	},
6313 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6314 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6315 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6316 	},
6317 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6318 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6319 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6320 	},
6321 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6322 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6323 	  0, 0
6324 	},
6325 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6326 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6327 	  0, 0
6328 	},
6329 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6330 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6331 	  0, 0
6332 	},
6333 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6334 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6335 	  0, 0
6336 	},
6337 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6338 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6339 	  0, 0
6340 	},
6341 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6342 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6343 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6344 	},
6345 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6346 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6347 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6348 	},
6349 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6350 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6351 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6352 	},
6353 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6354 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6355 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6356 	},
6357 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6358 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6359 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6360 	},
6361 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6362 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6363 	  0, 0
6364 	},
6365 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6366 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6367 	  0, 0
6368 	},
6369 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6370 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6371 	  0, 0
6372 	},
6373 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6374 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6375 	  0, 0
6376 	},
6377 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6378 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6379 	  0, 0
6380 	},
6381 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6382 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6383 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6384 	},
6385 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6386 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6387 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6388 	},
6389 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6390 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6391 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6392 	},
6393 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6394 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6395 	  0, 0
6396 	},
6397 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6398 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6399 	  0, 0
6400 	},
6401 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6402 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6403 	  0, 0
6404 	},
6405 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6406 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6407 	  0, 0
6408 	},
6409 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6410 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6411 	  0, 0
6412 	},
6413 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6414 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6415 	  0, 0
6416 	}
6417 };
6418 
6419 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6420 				     void *inject_if)
6421 {
6422 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6423 	int ret;
6424 	struct ta_ras_trigger_error_input block_info = { 0 };
6425 
6426 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6427 		return -EINVAL;
6428 
6429 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6430 		return -EINVAL;
6431 
6432 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6433 		return -EPERM;
6434 
6435 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6436 	      info->head.type)) {
6437 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6438 			ras_gfx_subblocks[info->head.sub_block_index].name,
6439 			info->head.type);
6440 		return -EPERM;
6441 	}
6442 
6443 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6444 	      info->head.type)) {
6445 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6446 			ras_gfx_subblocks[info->head.sub_block_index].name,
6447 			info->head.type);
6448 		return -EPERM;
6449 	}
6450 
6451 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6452 	block_info.sub_block_index =
6453 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6454 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6455 	block_info.address = info->address;
6456 	block_info.value = info->value;
6457 
6458 	mutex_lock(&adev->grbm_idx_mutex);
6459 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6460 	mutex_unlock(&adev->grbm_idx_mutex);
6461 
6462 	return ret;
6463 }
6464 
6465 static const char *vml2_mems[] = {
6466 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6467 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6468 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6469 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6470 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6471 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6472 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6473 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6474 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6475 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6476 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6477 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6478 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6479 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6480 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6481 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6482 };
6483 
6484 static const char *vml2_walker_mems[] = {
6485 	"UTC_VML2_CACHE_PDE0_MEM0",
6486 	"UTC_VML2_CACHE_PDE0_MEM1",
6487 	"UTC_VML2_CACHE_PDE1_MEM0",
6488 	"UTC_VML2_CACHE_PDE1_MEM1",
6489 	"UTC_VML2_CACHE_PDE2_MEM0",
6490 	"UTC_VML2_CACHE_PDE2_MEM1",
6491 	"UTC_VML2_RDIF_LOG_FIFO",
6492 };
6493 
6494 static const char *atc_l2_cache_2m_mems[] = {
6495 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6496 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6497 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6498 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6499 };
6500 
6501 static const char *atc_l2_cache_4k_mems[] = {
6502 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6503 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6504 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6505 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6506 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6507 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6508 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6509 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6510 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6511 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6512 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6513 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6514 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6515 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6516 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6517 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6518 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6519 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6520 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6521 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6522 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6523 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6524 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6525 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6526 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6527 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6528 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6529 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6530 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6531 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6532 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6533 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6534 };
6535 
6536 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6537 					 struct ras_err_data *err_data)
6538 {
6539 	uint32_t i, data;
6540 	uint32_t sec_count, ded_count;
6541 
6542 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6543 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6544 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6545 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6546 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6547 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6548 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6549 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6550 
6551 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6552 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6553 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6554 
6555 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6556 		if (sec_count) {
6557 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6558 				"SEC %d\n", i, vml2_mems[i], sec_count);
6559 			err_data->ce_count += sec_count;
6560 		}
6561 
6562 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6563 		if (ded_count) {
6564 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6565 				"DED %d\n", i, vml2_mems[i], ded_count);
6566 			err_data->ue_count += ded_count;
6567 		}
6568 	}
6569 
6570 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6571 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6572 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6573 
6574 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6575 						SEC_COUNT);
6576 		if (sec_count) {
6577 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6578 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6579 			err_data->ce_count += sec_count;
6580 		}
6581 
6582 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6583 						DED_COUNT);
6584 		if (ded_count) {
6585 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6586 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6587 			err_data->ue_count += ded_count;
6588 		}
6589 	}
6590 
6591 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6592 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6593 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6594 
6595 		sec_count = (data & 0x00006000L) >> 0xd;
6596 		if (sec_count) {
6597 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6598 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6599 				sec_count);
6600 			err_data->ce_count += sec_count;
6601 		}
6602 	}
6603 
6604 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6605 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6606 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6607 
6608 		sec_count = (data & 0x00006000L) >> 0xd;
6609 		if (sec_count) {
6610 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6611 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6612 				sec_count);
6613 			err_data->ce_count += sec_count;
6614 		}
6615 
6616 		ded_count = (data & 0x00018000L) >> 0xf;
6617 		if (ded_count) {
6618 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6619 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6620 				ded_count);
6621 			err_data->ue_count += ded_count;
6622 		}
6623 	}
6624 
6625 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6626 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6627 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6628 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6629 
6630 	return 0;
6631 }
6632 
6633 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6634 	const struct soc15_reg_entry *reg,
6635 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6636 	uint32_t *sec_count, uint32_t *ded_count)
6637 {
6638 	uint32_t i;
6639 	uint32_t sec_cnt, ded_cnt;
6640 
6641 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6642 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6643 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6644 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6645 			continue;
6646 
6647 		sec_cnt = (value &
6648 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6649 				gfx_v9_0_ras_fields[i].sec_count_shift;
6650 		if (sec_cnt) {
6651 			dev_info(adev->dev, "GFX SubBlock %s, "
6652 				"Instance[%d][%d], SEC %d\n",
6653 				gfx_v9_0_ras_fields[i].name,
6654 				se_id, inst_id,
6655 				sec_cnt);
6656 			*sec_count += sec_cnt;
6657 		}
6658 
6659 		ded_cnt = (value &
6660 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6661 				gfx_v9_0_ras_fields[i].ded_count_shift;
6662 		if (ded_cnt) {
6663 			dev_info(adev->dev, "GFX SubBlock %s, "
6664 				"Instance[%d][%d], DED %d\n",
6665 				gfx_v9_0_ras_fields[i].name,
6666 				se_id, inst_id,
6667 				ded_cnt);
6668 			*ded_count += ded_cnt;
6669 		}
6670 	}
6671 
6672 	return 0;
6673 }
6674 
6675 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6676 {
6677 	int i, j, k;
6678 
6679 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6680 		return;
6681 
6682 	/* read back registers to clear the counters */
6683 	mutex_lock(&adev->grbm_idx_mutex);
6684 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6685 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6686 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6687 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6688 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6689 			}
6690 		}
6691 	}
6692 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6693 	mutex_unlock(&adev->grbm_idx_mutex);
6694 
6695 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6696 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6697 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6698 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6699 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6700 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6701 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6702 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6703 
6704 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6705 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6706 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6707 	}
6708 
6709 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6710 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6711 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6712 	}
6713 
6714 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6715 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6716 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6717 	}
6718 
6719 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6720 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6721 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6722 	}
6723 
6724 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6725 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6726 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6727 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6728 }
6729 
6730 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6731 					  void *ras_error_status)
6732 {
6733 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6734 	uint32_t sec_count = 0, ded_count = 0;
6735 	uint32_t i, j, k;
6736 	uint32_t reg_value;
6737 
6738 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6739 		return -EINVAL;
6740 
6741 	err_data->ue_count = 0;
6742 	err_data->ce_count = 0;
6743 
6744 	mutex_lock(&adev->grbm_idx_mutex);
6745 
6746 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6747 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6748 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6749 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6750 				reg_value =
6751 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6752 				if (reg_value)
6753 					gfx_v9_0_ras_error_count(adev,
6754 						&gfx_v9_0_edc_counter_regs[i],
6755 						j, k, reg_value,
6756 						&sec_count, &ded_count);
6757 			}
6758 		}
6759 	}
6760 
6761 	err_data->ce_count += sec_count;
6762 	err_data->ue_count += ded_count;
6763 
6764 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765 	mutex_unlock(&adev->grbm_idx_mutex);
6766 
6767 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6768 
6769 	return 0;
6770 }
6771 
6772 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6773 {
6774 	const unsigned int cp_coher_cntl =
6775 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6776 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6777 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6778 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6779 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6780 
6781 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6782 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6783 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6784 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6785 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6786 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6787 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6788 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6789 }
6790 
6791 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6792 					uint32_t pipe, bool enable)
6793 {
6794 	struct amdgpu_device *adev = ring->adev;
6795 	uint32_t val;
6796 	uint32_t wcl_cs_reg;
6797 
6798 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6799 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6800 
6801 	switch (pipe) {
6802 	case 0:
6803 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6804 		break;
6805 	case 1:
6806 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6807 		break;
6808 	case 2:
6809 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6810 		break;
6811 	case 3:
6812 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6813 		break;
6814 	default:
6815 		DRM_DEBUG("invalid pipe %d\n", pipe);
6816 		return;
6817 	}
6818 
6819 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6820 
6821 }
6822 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6823 {
6824 	struct amdgpu_device *adev = ring->adev;
6825 	uint32_t val;
6826 	int i;
6827 
6828 
6829 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6830 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6831 	 * around 25% of gpu resources.
6832 	 */
6833 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6834 	amdgpu_ring_emit_wreg(ring,
6835 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6836 			      val);
6837 
6838 	/* Restrict waves for normal/low priority compute queues as well
6839 	 * to get best QoS for high priority compute jobs.
6840 	 *
6841 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6842 	 */
6843 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6844 		if (i != ring->pipe)
6845 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6846 
6847 	}
6848 }
6849 
6850 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6851 	.name = "gfx_v9_0",
6852 	.early_init = gfx_v9_0_early_init,
6853 	.late_init = gfx_v9_0_late_init,
6854 	.sw_init = gfx_v9_0_sw_init,
6855 	.sw_fini = gfx_v9_0_sw_fini,
6856 	.hw_init = gfx_v9_0_hw_init,
6857 	.hw_fini = gfx_v9_0_hw_fini,
6858 	.suspend = gfx_v9_0_suspend,
6859 	.resume = gfx_v9_0_resume,
6860 	.is_idle = gfx_v9_0_is_idle,
6861 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6862 	.soft_reset = gfx_v9_0_soft_reset,
6863 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6864 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6865 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6866 };
6867 
6868 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6869 	.type = AMDGPU_RING_TYPE_GFX,
6870 	.align_mask = 0xff,
6871 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6872 	.support_64bit_ptrs = true,
6873 	.vmhub = AMDGPU_GFXHUB_0,
6874 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6875 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6876 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6877 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6878 		5 +  /* COND_EXEC */
6879 		7 +  /* PIPELINE_SYNC */
6880 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6881 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6882 		2 + /* VM_FLUSH */
6883 		8 +  /* FENCE for VM_FLUSH */
6884 		20 + /* GDS switch */
6885 		4 + /* double SWITCH_BUFFER,
6886 		       the first COND_EXEC jump to the place just
6887 			   prior to this double SWITCH_BUFFER  */
6888 		5 + /* COND_EXEC */
6889 		7 +	 /*	HDP_flush */
6890 		4 +	 /*	VGT_flush */
6891 		14 + /*	CE_META */
6892 		31 + /*	DE_META */
6893 		3 + /* CNTX_CTRL */
6894 		5 + /* HDP_INVL */
6895 		8 + 8 + /* FENCE x2 */
6896 		2 + /* SWITCH_BUFFER */
6897 		7, /* gfx_v9_0_emit_mem_sync */
6898 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6899 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6900 	.emit_fence = gfx_v9_0_ring_emit_fence,
6901 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6902 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6903 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6904 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6905 	.test_ring = gfx_v9_0_ring_test_ring,
6906 	.test_ib = gfx_v9_0_ring_test_ib,
6907 	.insert_nop = amdgpu_ring_insert_nop,
6908 	.pad_ib = amdgpu_ring_generic_pad_ib,
6909 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6910 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6911 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6912 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6913 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6914 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6915 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6916 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6917 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6918 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6919 };
6920 
6921 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6922 	.type = AMDGPU_RING_TYPE_COMPUTE,
6923 	.align_mask = 0xff,
6924 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6925 	.support_64bit_ptrs = true,
6926 	.vmhub = AMDGPU_GFXHUB_0,
6927 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6928 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6929 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6930 	.emit_frame_size =
6931 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6932 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6933 		5 + /* hdp invalidate */
6934 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6935 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6936 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6937 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6938 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6939 		7 + /* gfx_v9_0_emit_mem_sync */
6940 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6941 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6942 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6943 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6944 	.emit_fence = gfx_v9_0_ring_emit_fence,
6945 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6946 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6947 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6948 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6949 	.test_ring = gfx_v9_0_ring_test_ring,
6950 	.test_ib = gfx_v9_0_ring_test_ib,
6951 	.insert_nop = amdgpu_ring_insert_nop,
6952 	.pad_ib = amdgpu_ring_generic_pad_ib,
6953 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6954 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6955 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6956 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6957 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6958 };
6959 
6960 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6961 	.type = AMDGPU_RING_TYPE_KIQ,
6962 	.align_mask = 0xff,
6963 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964 	.support_64bit_ptrs = true,
6965 	.vmhub = AMDGPU_GFXHUB_0,
6966 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6967 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6968 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6969 	.emit_frame_size =
6970 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6971 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6972 		5 + /* hdp invalidate */
6973 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6974 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6975 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6976 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6977 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6978 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6979 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6980 	.test_ring = gfx_v9_0_ring_test_ring,
6981 	.insert_nop = amdgpu_ring_insert_nop,
6982 	.pad_ib = amdgpu_ring_generic_pad_ib,
6983 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6984 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6985 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6986 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6987 };
6988 
6989 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6990 {
6991 	int i;
6992 
6993 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6994 
6995 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6996 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6997 
6998 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6999 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7000 }
7001 
7002 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7003 	.set = gfx_v9_0_set_eop_interrupt_state,
7004 	.process = gfx_v9_0_eop_irq,
7005 };
7006 
7007 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7008 	.set = gfx_v9_0_set_priv_reg_fault_state,
7009 	.process = gfx_v9_0_priv_reg_irq,
7010 };
7011 
7012 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7013 	.set = gfx_v9_0_set_priv_inst_fault_state,
7014 	.process = gfx_v9_0_priv_inst_irq,
7015 };
7016 
7017 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7018 	.set = gfx_v9_0_set_cp_ecc_error_state,
7019 	.process = amdgpu_gfx_cp_ecc_error_irq,
7020 };
7021 
7022 
7023 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7024 {
7025 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7026 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7027 
7028 	adev->gfx.priv_reg_irq.num_types = 1;
7029 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7030 
7031 	adev->gfx.priv_inst_irq.num_types = 1;
7032 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7033 
7034 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7035 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7036 }
7037 
7038 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7039 {
7040 	switch (adev->ip_versions[GC_HWIP][0]) {
7041 	case IP_VERSION(9, 0, 1):
7042 	case IP_VERSION(9, 2, 1):
7043 	case IP_VERSION(9, 4, 0):
7044 	case IP_VERSION(9, 2, 2):
7045 	case IP_VERSION(9, 1, 0):
7046 	case IP_VERSION(9, 4, 1):
7047 	case IP_VERSION(9, 3, 0):
7048 	case IP_VERSION(9, 4, 2):
7049 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7050 		break;
7051 	default:
7052 		break;
7053 	}
7054 }
7055 
7056 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7057 {
7058 	/* init asci gds info */
7059 	switch (adev->ip_versions[GC_HWIP][0]) {
7060 	case IP_VERSION(9, 0, 1):
7061 	case IP_VERSION(9, 2, 1):
7062 	case IP_VERSION(9, 4, 0):
7063 		adev->gds.gds_size = 0x10000;
7064 		break;
7065 	case IP_VERSION(9, 2, 2):
7066 	case IP_VERSION(9, 1, 0):
7067 	case IP_VERSION(9, 4, 1):
7068 		adev->gds.gds_size = 0x1000;
7069 		break;
7070 	case IP_VERSION(9, 4, 2):
7071 		/* aldebaran removed all the GDS internal memory,
7072 		 * only support GWS opcode in kernel, like barrier
7073 		 * semaphore.etc */
7074 		adev->gds.gds_size = 0;
7075 		break;
7076 	default:
7077 		adev->gds.gds_size = 0x10000;
7078 		break;
7079 	}
7080 
7081 	switch (adev->ip_versions[GC_HWIP][0]) {
7082 	case IP_VERSION(9, 0, 1):
7083 	case IP_VERSION(9, 4, 0):
7084 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7085 		break;
7086 	case IP_VERSION(9, 2, 1):
7087 		adev->gds.gds_compute_max_wave_id = 0x27f;
7088 		break;
7089 	case IP_VERSION(9, 2, 2):
7090 	case IP_VERSION(9, 1, 0):
7091 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7092 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7093 		else
7094 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7095 		break;
7096 	case IP_VERSION(9, 4, 1):
7097 		adev->gds.gds_compute_max_wave_id = 0xfff;
7098 		break;
7099 	case IP_VERSION(9, 4, 2):
7100 		/* deprecated for Aldebaran, no usage at all */
7101 		adev->gds.gds_compute_max_wave_id = 0;
7102 		break;
7103 	default:
7104 		/* this really depends on the chip */
7105 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7106 		break;
7107 	}
7108 
7109 	adev->gds.gws_size = 64;
7110 	adev->gds.oa_size = 16;
7111 }
7112 
7113 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7114 						 u32 bitmap)
7115 {
7116 	u32 data;
7117 
7118 	if (!bitmap)
7119 		return;
7120 
7121 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7122 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7123 
7124 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7125 }
7126 
7127 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7128 {
7129 	u32 data, mask;
7130 
7131 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7132 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7133 
7134 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7135 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7136 
7137 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7138 
7139 	return (~data) & mask;
7140 }
7141 
7142 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7143 				 struct amdgpu_cu_info *cu_info)
7144 {
7145 	int i, j, k, counter, active_cu_number = 0;
7146 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7147 	unsigned disable_masks[4 * 4];
7148 
7149 	if (!adev || !cu_info)
7150 		return -EINVAL;
7151 
7152 	/*
7153 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7154 	 */
7155 	if (adev->gfx.config.max_shader_engines *
7156 		adev->gfx.config.max_sh_per_se > 16)
7157 		return -EINVAL;
7158 
7159 	amdgpu_gfx_parse_disable_cu(disable_masks,
7160 				    adev->gfx.config.max_shader_engines,
7161 				    adev->gfx.config.max_sh_per_se);
7162 
7163 	mutex_lock(&adev->grbm_idx_mutex);
7164 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7165 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7166 			mask = 1;
7167 			ao_bitmap = 0;
7168 			counter = 0;
7169 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7170 			gfx_v9_0_set_user_cu_inactive_bitmap(
7171 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7172 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7173 
7174 			/*
7175 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7176 			 * 4x4 size array, and it's usually suitable for Vega
7177 			 * ASICs which has 4*2 SE/SH layout.
7178 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7179 			 * To mostly reduce the impact, we make it compatible
7180 			 * with current bitmap array as below:
7181 			 *    SE4,SH0 --> bitmap[0][1]
7182 			 *    SE5,SH0 --> bitmap[1][1]
7183 			 *    SE6,SH0 --> bitmap[2][1]
7184 			 *    SE7,SH0 --> bitmap[3][1]
7185 			 */
7186 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7187 
7188 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7189 				if (bitmap & mask) {
7190 					if (counter < adev->gfx.config.max_cu_per_sh)
7191 						ao_bitmap |= mask;
7192 					counter ++;
7193 				}
7194 				mask <<= 1;
7195 			}
7196 			active_cu_number += counter;
7197 			if (i < 2 && j < 2)
7198 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7199 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7200 		}
7201 	}
7202 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7203 	mutex_unlock(&adev->grbm_idx_mutex);
7204 
7205 	cu_info->number = active_cu_number;
7206 	cu_info->ao_cu_mask = ao_cu_mask;
7207 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7208 
7209 	return 0;
7210 }
7211 
7212 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7213 {
7214 	.type = AMD_IP_BLOCK_TYPE_GFX,
7215 	.major = 9,
7216 	.minor = 0,
7217 	.rev = 0,
7218 	.funcs = &gfx_v9_0_ip_funcs,
7219 };
7220