xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision c6fbbf1e)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147 
148 enum ta_ras_gfx_subblock {
149 	/*CPC*/
150 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 	TA_RAS_BLOCK__GFX_CPC_UCODE,
153 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 	/* CPF*/
161 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 	TA_RAS_BLOCK__GFX_CPF_TAG,
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 	/* CPG*/
167 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 	TA_RAS_BLOCK__GFX_CPG_TAG,
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 	/* GDS*/
173 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 	/* SPI*/
181 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 	/* SQ*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 	/* SQC (3 ranges)*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	/* SQC range 0*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	/* SQC range 1*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	/* SQC range 2*/
218 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 	/* TA*/
233 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 	/* TCA*/
241 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 	/* TCC (5 sub-ranges)*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	/* TCC range 0*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 	/* TCC range 1*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	/* TCC range 2*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	/* TCC range 3*/
277 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	/* TCC range 4*/
283 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 	/* TCI*/
291 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 	/* TCP*/
293 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 	/* TD*/
303 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 	/* EA (3 sub-ranges)*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	/* EA range 0*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 	/* EA range 1*/
322 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 	/* EA range 2*/
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 	/* UTC VM L2 bank*/
340 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 	/* UTC VM walker*/
342 	TA_RAS_BLOCK__UTC_VML2_WALKER,
343 	/* UTC ATC L2 2MB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 	/* UTC ATC L2 4KB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 	TA_RAS_BLOCK__GFX_MAX
348 };
349 
350 struct ras_gfx_subblock {
351 	unsigned char *name;
352 	int ta_subblock;
353 	int hw_supported_error_type;
354 	int sw_supported_error_type;
355 };
356 
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359 		#subblock,                                                     \
360 		TA_RAS_BLOCK__##subblock,                                      \
361 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363 	}
364 
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 			     0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 			     1),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 			     0, 0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 			     0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 			     0, 0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 			     0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 			     0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 			     0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 			     0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538 
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560 
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575 
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603 
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614 
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637 
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653 
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680 
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697 
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712 
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717 
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729 
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741 
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
746 
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752 				struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757 					  void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
759 				     void *inject_if);
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
761 
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
763 				uint64_t queue_mask)
764 {
765 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766 	amdgpu_ring_write(kiq_ring,
767 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
768 		/* vmid_mask:0* queue_type:0 (KIQ) */
769 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770 	amdgpu_ring_write(kiq_ring,
771 			lower_32_bits(queue_mask));	/* queue mask lo */
772 	amdgpu_ring_write(kiq_ring,
773 			upper_32_bits(queue_mask));	/* queue mask hi */
774 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
775 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
776 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
777 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
778 }
779 
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781 				 struct amdgpu_ring *ring)
782 {
783 	struct amdgpu_device *adev = kiq_ring->adev;
784 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
785 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
786 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
787 
788 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
789 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
790 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
791 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
792 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
793 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
794 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
795 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
796 			 /*queue_type: normal compute queue */
797 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
798 			 /* alloc format: all_on_one_pipe */
799 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
800 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
801 			 /* num_queues: must be 1 */
802 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
803 	amdgpu_ring_write(kiq_ring,
804 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
805 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
806 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
807 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
808 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
809 }
810 
811 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
812 				   struct amdgpu_ring *ring,
813 				   enum amdgpu_unmap_queues_action action,
814 				   u64 gpu_addr, u64 seq)
815 {
816 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
817 
818 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
819 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
820 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
821 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
822 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
823 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
824 	amdgpu_ring_write(kiq_ring,
825 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
826 
827 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
828 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
829 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
830 		amdgpu_ring_write(kiq_ring, seq);
831 	} else {
832 		amdgpu_ring_write(kiq_ring, 0);
833 		amdgpu_ring_write(kiq_ring, 0);
834 		amdgpu_ring_write(kiq_ring, 0);
835 	}
836 }
837 
838 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
839 				   struct amdgpu_ring *ring,
840 				   u64 addr,
841 				   u64 seq)
842 {
843 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
844 
845 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
846 	amdgpu_ring_write(kiq_ring,
847 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
848 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
849 			  PACKET3_QUERY_STATUS_COMMAND(2));
850 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
851 	amdgpu_ring_write(kiq_ring,
852 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
853 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
854 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
855 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
856 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
857 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
858 }
859 
860 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
861 				uint16_t pasid, uint32_t flush_type,
862 				bool all_hub)
863 {
864 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
865 	amdgpu_ring_write(kiq_ring,
866 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
867 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
868 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
869 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
870 }
871 
872 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
873 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
874 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
875 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
876 	.kiq_query_status = gfx_v9_0_kiq_query_status,
877 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
878 	.set_resources_size = 8,
879 	.map_queues_size = 7,
880 	.unmap_queues_size = 6,
881 	.query_status_size = 7,
882 	.invalidate_tlbs_size = 2,
883 };
884 
885 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
886 {
887 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
888 }
889 
890 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
891 {
892 	switch (adev->ip_versions[GC_HWIP][0]) {
893 	case IP_VERSION(9, 0, 1):
894 		soc15_program_register_sequence(adev,
895 						golden_settings_gc_9_0,
896 						ARRAY_SIZE(golden_settings_gc_9_0));
897 		soc15_program_register_sequence(adev,
898 						golden_settings_gc_9_0_vg10,
899 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
900 		break;
901 	case IP_VERSION(9, 2, 1):
902 		soc15_program_register_sequence(adev,
903 						golden_settings_gc_9_2_1,
904 						ARRAY_SIZE(golden_settings_gc_9_2_1));
905 		soc15_program_register_sequence(adev,
906 						golden_settings_gc_9_2_1_vg12,
907 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
908 		break;
909 	case IP_VERSION(9, 4, 0):
910 		soc15_program_register_sequence(adev,
911 						golden_settings_gc_9_0,
912 						ARRAY_SIZE(golden_settings_gc_9_0));
913 		soc15_program_register_sequence(adev,
914 						golden_settings_gc_9_0_vg20,
915 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
916 		break;
917 	case IP_VERSION(9, 4, 1):
918 		soc15_program_register_sequence(adev,
919 						golden_settings_gc_9_4_1_arct,
920 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
921 		break;
922 	case IP_VERSION(9, 2, 2):
923 	case IP_VERSION(9, 1, 0):
924 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
925 						ARRAY_SIZE(golden_settings_gc_9_1));
926 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
927 			soc15_program_register_sequence(adev,
928 							golden_settings_gc_9_1_rv2,
929 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
930 		else
931 			soc15_program_register_sequence(adev,
932 							golden_settings_gc_9_1_rv1,
933 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
934 		break;
935 	 case IP_VERSION(9, 3, 0):
936 		soc15_program_register_sequence(adev,
937 						golden_settings_gc_9_1_rn,
938 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
939 		return; /* for renoir, don't need common goldensetting */
940 	case IP_VERSION(9, 4, 2):
941 		gfx_v9_4_2_init_golden_registers(adev,
942 						 adev->smuio.funcs->get_die_id(adev));
943 		break;
944 	default:
945 		break;
946 	}
947 
948 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
949 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
950 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
951 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
952 }
953 
954 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
955 {
956 	adev->gfx.scratch.num_reg = 8;
957 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
958 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
959 }
960 
961 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
962 				       bool wc, uint32_t reg, uint32_t val)
963 {
964 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
965 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
966 				WRITE_DATA_DST_SEL(0) |
967 				(wc ? WR_CONFIRM : 0));
968 	amdgpu_ring_write(ring, reg);
969 	amdgpu_ring_write(ring, 0);
970 	amdgpu_ring_write(ring, val);
971 }
972 
973 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
974 				  int mem_space, int opt, uint32_t addr0,
975 				  uint32_t addr1, uint32_t ref, uint32_t mask,
976 				  uint32_t inv)
977 {
978 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
979 	amdgpu_ring_write(ring,
980 				 /* memory (1) or register (0) */
981 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
982 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
983 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
984 				 WAIT_REG_MEM_ENGINE(eng_sel)));
985 
986 	if (mem_space)
987 		BUG_ON(addr0 & 0x3); /* Dword align */
988 	amdgpu_ring_write(ring, addr0);
989 	amdgpu_ring_write(ring, addr1);
990 	amdgpu_ring_write(ring, ref);
991 	amdgpu_ring_write(ring, mask);
992 	amdgpu_ring_write(ring, inv); /* poll interval */
993 }
994 
995 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
996 {
997 	struct amdgpu_device *adev = ring->adev;
998 	uint32_t scratch;
999 	uint32_t tmp = 0;
1000 	unsigned i;
1001 	int r;
1002 
1003 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1004 	if (r)
1005 		return r;
1006 
1007 	WREG32(scratch, 0xCAFEDEAD);
1008 	r = amdgpu_ring_alloc(ring, 3);
1009 	if (r)
1010 		goto error_free_scratch;
1011 
1012 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1013 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1014 	amdgpu_ring_write(ring, 0xDEADBEEF);
1015 	amdgpu_ring_commit(ring);
1016 
1017 	for (i = 0; i < adev->usec_timeout; i++) {
1018 		tmp = RREG32(scratch);
1019 		if (tmp == 0xDEADBEEF)
1020 			break;
1021 		udelay(1);
1022 	}
1023 
1024 	if (i >= adev->usec_timeout)
1025 		r = -ETIMEDOUT;
1026 
1027 error_free_scratch:
1028 	amdgpu_gfx_scratch_free(adev, scratch);
1029 	return r;
1030 }
1031 
1032 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1033 {
1034 	struct amdgpu_device *adev = ring->adev;
1035 	struct amdgpu_ib ib;
1036 	struct dma_fence *f = NULL;
1037 
1038 	unsigned index;
1039 	uint64_t gpu_addr;
1040 	uint32_t tmp;
1041 	long r;
1042 
1043 	r = amdgpu_device_wb_get(adev, &index);
1044 	if (r)
1045 		return r;
1046 
1047 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1048 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1049 	memset(&ib, 0, sizeof(ib));
1050 	r = amdgpu_ib_get(adev, NULL, 16,
1051 					AMDGPU_IB_POOL_DIRECT, &ib);
1052 	if (r)
1053 		goto err1;
1054 
1055 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1056 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1057 	ib.ptr[2] = lower_32_bits(gpu_addr);
1058 	ib.ptr[3] = upper_32_bits(gpu_addr);
1059 	ib.ptr[4] = 0xDEADBEEF;
1060 	ib.length_dw = 5;
1061 
1062 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1063 	if (r)
1064 		goto err2;
1065 
1066 	r = dma_fence_wait_timeout(f, false, timeout);
1067 	if (r == 0) {
1068 		r = -ETIMEDOUT;
1069 		goto err2;
1070 	} else if (r < 0) {
1071 		goto err2;
1072 	}
1073 
1074 	tmp = adev->wb.wb[index];
1075 	if (tmp == 0xDEADBEEF)
1076 		r = 0;
1077 	else
1078 		r = -EINVAL;
1079 
1080 err2:
1081 	amdgpu_ib_free(adev, &ib, NULL);
1082 	dma_fence_put(f);
1083 err1:
1084 	amdgpu_device_wb_free(adev, index);
1085 	return r;
1086 }
1087 
1088 
1089 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1090 {
1091 	release_firmware(adev->gfx.pfp_fw);
1092 	adev->gfx.pfp_fw = NULL;
1093 	release_firmware(adev->gfx.me_fw);
1094 	adev->gfx.me_fw = NULL;
1095 	release_firmware(adev->gfx.ce_fw);
1096 	adev->gfx.ce_fw = NULL;
1097 	release_firmware(adev->gfx.rlc_fw);
1098 	adev->gfx.rlc_fw = NULL;
1099 	release_firmware(adev->gfx.mec_fw);
1100 	adev->gfx.mec_fw = NULL;
1101 	release_firmware(adev->gfx.mec2_fw);
1102 	adev->gfx.mec2_fw = NULL;
1103 
1104 	kfree(adev->gfx.rlc.register_list_format);
1105 }
1106 
1107 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1108 {
1109 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1110 
1111 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1112 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1113 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1114 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1115 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1116 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1117 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1118 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1119 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1120 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1121 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1122 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1123 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1124 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1125 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1126 }
1127 
1128 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1129 {
1130 	adev->gfx.me_fw_write_wait = false;
1131 	adev->gfx.mec_fw_write_wait = false;
1132 
1133 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1134 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1135 	    (adev->gfx.mec_feature_version < 46) ||
1136 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1137 	    (adev->gfx.pfp_feature_version < 46)))
1138 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1139 
1140 	switch (adev->ip_versions[GC_HWIP][0]) {
1141 	case IP_VERSION(9, 0, 1):
1142 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1143 		    (adev->gfx.me_feature_version >= 42) &&
1144 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1145 		    (adev->gfx.pfp_feature_version >= 42))
1146 			adev->gfx.me_fw_write_wait = true;
1147 
1148 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1149 		    (adev->gfx.mec_feature_version >= 42))
1150 			adev->gfx.mec_fw_write_wait = true;
1151 		break;
1152 	case IP_VERSION(9, 2, 1):
1153 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1154 		    (adev->gfx.me_feature_version >= 44) &&
1155 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1156 		    (adev->gfx.pfp_feature_version >= 44))
1157 			adev->gfx.me_fw_write_wait = true;
1158 
1159 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1160 		    (adev->gfx.mec_feature_version >= 44))
1161 			adev->gfx.mec_fw_write_wait = true;
1162 		break;
1163 	case IP_VERSION(9, 4, 0):
1164 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1165 		    (adev->gfx.me_feature_version >= 44) &&
1166 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1167 		    (adev->gfx.pfp_feature_version >= 44))
1168 			adev->gfx.me_fw_write_wait = true;
1169 
1170 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1171 		    (adev->gfx.mec_feature_version >= 44))
1172 			adev->gfx.mec_fw_write_wait = true;
1173 		break;
1174 	case IP_VERSION(9, 1, 0):
1175 	case IP_VERSION(9, 2, 2):
1176 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1177 		    (adev->gfx.me_feature_version >= 42) &&
1178 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1179 		    (adev->gfx.pfp_feature_version >= 42))
1180 			adev->gfx.me_fw_write_wait = true;
1181 
1182 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1183 		    (adev->gfx.mec_feature_version >= 42))
1184 			adev->gfx.mec_fw_write_wait = true;
1185 		break;
1186 	default:
1187 		adev->gfx.me_fw_write_wait = true;
1188 		adev->gfx.mec_fw_write_wait = true;
1189 		break;
1190 	}
1191 }
1192 
1193 struct amdgpu_gfxoff_quirk {
1194 	u16 chip_vendor;
1195 	u16 chip_device;
1196 	u16 subsys_vendor;
1197 	u16 subsys_device;
1198 	u8 revision;
1199 };
1200 
1201 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1202 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1203 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1204 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1205 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1206 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1207 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1208 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1209 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1210 	{ 0, 0, 0, 0, 0 },
1211 };
1212 
1213 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1214 {
1215 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1216 
1217 	while (p && p->chip_device != 0) {
1218 		if (pdev->vendor == p->chip_vendor &&
1219 		    pdev->device == p->chip_device &&
1220 		    pdev->subsystem_vendor == p->subsys_vendor &&
1221 		    pdev->subsystem_device == p->subsys_device &&
1222 		    pdev->revision == p->revision) {
1223 			return true;
1224 		}
1225 		++p;
1226 	}
1227 	return false;
1228 }
1229 
1230 static bool is_raven_kicker(struct amdgpu_device *adev)
1231 {
1232 	if (adev->pm.fw_version >= 0x41e2b)
1233 		return true;
1234 	else
1235 		return false;
1236 }
1237 
1238 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1239 {
1240 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1241 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1242 	    (adev->gfx.me_feature_version >= 52))
1243 		return true;
1244 	else
1245 		return false;
1246 }
1247 
1248 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1249 {
1250 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1251 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1252 
1253 	switch (adev->ip_versions[GC_HWIP][0]) {
1254 	case IP_VERSION(9, 0, 1):
1255 	case IP_VERSION(9, 2, 1):
1256 	case IP_VERSION(9, 4, 0):
1257 		break;
1258 	case IP_VERSION(9, 2, 2):
1259 	case IP_VERSION(9, 1, 0):
1260 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1261 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1262 		    ((!is_raven_kicker(adev) &&
1263 		      adev->gfx.rlc_fw_version < 531) ||
1264 		     (adev->gfx.rlc_feature_version < 1) ||
1265 		     !adev->gfx.rlc.is_rlc_v2_1))
1266 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1267 
1268 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1269 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1270 				AMD_PG_SUPPORT_CP |
1271 				AMD_PG_SUPPORT_RLC_SMU_HS;
1272 		break;
1273 	case IP_VERSION(9, 3, 0):
1274 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1275 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1276 				AMD_PG_SUPPORT_CP |
1277 				AMD_PG_SUPPORT_RLC_SMU_HS;
1278 		break;
1279 	default:
1280 		break;
1281 	}
1282 }
1283 
1284 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1285 					  const char *chip_name)
1286 {
1287 	char fw_name[30];
1288 	int err;
1289 	struct amdgpu_firmware_info *info = NULL;
1290 	const struct common_firmware_header *header = NULL;
1291 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1292 
1293 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1294 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1295 	if (err)
1296 		goto out;
1297 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1298 	if (err)
1299 		goto out;
1300 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1301 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1302 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1303 
1304 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1305 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1306 	if (err)
1307 		goto out;
1308 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1309 	if (err)
1310 		goto out;
1311 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1312 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1313 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1314 
1315 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1316 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1317 	if (err)
1318 		goto out;
1319 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1320 	if (err)
1321 		goto out;
1322 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1323 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1324 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1325 
1326 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1327 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1328 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1329 		info->fw = adev->gfx.pfp_fw;
1330 		header = (const struct common_firmware_header *)info->fw->data;
1331 		adev->firmware.fw_size +=
1332 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1333 
1334 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1335 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1336 		info->fw = adev->gfx.me_fw;
1337 		header = (const struct common_firmware_header *)info->fw->data;
1338 		adev->firmware.fw_size +=
1339 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1340 
1341 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1342 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1343 		info->fw = adev->gfx.ce_fw;
1344 		header = (const struct common_firmware_header *)info->fw->data;
1345 		adev->firmware.fw_size +=
1346 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1347 	}
1348 
1349 out:
1350 	if (err) {
1351 		dev_err(adev->dev,
1352 			"gfx9: Failed to load firmware \"%s\"\n",
1353 			fw_name);
1354 		release_firmware(adev->gfx.pfp_fw);
1355 		adev->gfx.pfp_fw = NULL;
1356 		release_firmware(adev->gfx.me_fw);
1357 		adev->gfx.me_fw = NULL;
1358 		release_firmware(adev->gfx.ce_fw);
1359 		adev->gfx.ce_fw = NULL;
1360 	}
1361 	return err;
1362 }
1363 
1364 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1365 					  const char *chip_name)
1366 {
1367 	char fw_name[30];
1368 	int err;
1369 	struct amdgpu_firmware_info *info = NULL;
1370 	const struct common_firmware_header *header = NULL;
1371 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1372 	unsigned int *tmp = NULL;
1373 	unsigned int i = 0;
1374 	uint16_t version_major;
1375 	uint16_t version_minor;
1376 	uint32_t smu_version;
1377 
1378 	/*
1379 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1380 	 * instead of picasso_rlc.bin.
1381 	 * Judgment method:
1382 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1383 	 *          or revision >= 0xD8 && revision <= 0xDF
1384 	 * otherwise is PCO FP5
1385 	 */
1386 	if (!strcmp(chip_name, "picasso") &&
1387 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1388 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1389 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1390 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1391 		(smu_version >= 0x41e2b))
1392 		/**
1393 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1394 		*/
1395 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1396 	else
1397 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1398 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1399 	if (err)
1400 		goto out;
1401 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1402 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1403 
1404 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1405 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1406 	if (version_major == 2 && version_minor == 1)
1407 		adev->gfx.rlc.is_rlc_v2_1 = true;
1408 
1409 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1410 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1411 	adev->gfx.rlc.save_and_restore_offset =
1412 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1413 	adev->gfx.rlc.clear_state_descriptor_offset =
1414 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1415 	adev->gfx.rlc.avail_scratch_ram_locations =
1416 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1417 	adev->gfx.rlc.reg_restore_list_size =
1418 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1419 	adev->gfx.rlc.reg_list_format_start =
1420 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1421 	adev->gfx.rlc.reg_list_format_separate_start =
1422 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1423 	adev->gfx.rlc.starting_offsets_start =
1424 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1425 	adev->gfx.rlc.reg_list_format_size_bytes =
1426 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1427 	adev->gfx.rlc.reg_list_size_bytes =
1428 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1429 	adev->gfx.rlc.register_list_format =
1430 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1431 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1432 	if (!adev->gfx.rlc.register_list_format) {
1433 		err = -ENOMEM;
1434 		goto out;
1435 	}
1436 
1437 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1438 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1439 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1440 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1441 
1442 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1443 
1444 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1445 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1446 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1447 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1448 
1449 	if (adev->gfx.rlc.is_rlc_v2_1)
1450 		gfx_v9_0_init_rlc_ext_microcode(adev);
1451 
1452 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1453 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1454 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1455 		info->fw = adev->gfx.rlc_fw;
1456 		header = (const struct common_firmware_header *)info->fw->data;
1457 		adev->firmware.fw_size +=
1458 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1459 
1460 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1461 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1462 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1463 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1464 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1465 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1466 			info->fw = adev->gfx.rlc_fw;
1467 			adev->firmware.fw_size +=
1468 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1469 
1470 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1471 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1472 			info->fw = adev->gfx.rlc_fw;
1473 			adev->firmware.fw_size +=
1474 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1475 
1476 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1477 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1478 			info->fw = adev->gfx.rlc_fw;
1479 			adev->firmware.fw_size +=
1480 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1481 		}
1482 	}
1483 
1484 out:
1485 	if (err) {
1486 		dev_err(adev->dev,
1487 			"gfx9: Failed to load firmware \"%s\"\n",
1488 			fw_name);
1489 		release_firmware(adev->gfx.rlc_fw);
1490 		adev->gfx.rlc_fw = NULL;
1491 	}
1492 	return err;
1493 }
1494 
1495 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1496 {
1497 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1498 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1499 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1500 		return false;
1501 
1502 	return true;
1503 }
1504 
1505 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1506 					  const char *chip_name)
1507 {
1508 	char fw_name[30];
1509 	int err;
1510 	struct amdgpu_firmware_info *info = NULL;
1511 	const struct common_firmware_header *header = NULL;
1512 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1513 
1514 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1515 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1516 	if (err)
1517 		goto out;
1518 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1519 	if (err)
1520 		goto out;
1521 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1522 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1523 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1524 
1525 
1526 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1527 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1528 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1529 		if (!err) {
1530 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1531 			if (err)
1532 				goto out;
1533 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1534 			adev->gfx.mec2_fw->data;
1535 			adev->gfx.mec2_fw_version =
1536 			le32_to_cpu(cp_hdr->header.ucode_version);
1537 			adev->gfx.mec2_feature_version =
1538 			le32_to_cpu(cp_hdr->ucode_feature_version);
1539 		} else {
1540 			err = 0;
1541 			adev->gfx.mec2_fw = NULL;
1542 		}
1543 	} else {
1544 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1545 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1546 	}
1547 
1548 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1549 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1550 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1551 		info->fw = adev->gfx.mec_fw;
1552 		header = (const struct common_firmware_header *)info->fw->data;
1553 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1554 		adev->firmware.fw_size +=
1555 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1556 
1557 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1558 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1559 		info->fw = adev->gfx.mec_fw;
1560 		adev->firmware.fw_size +=
1561 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1562 
1563 		if (adev->gfx.mec2_fw) {
1564 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1565 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1566 			info->fw = adev->gfx.mec2_fw;
1567 			header = (const struct common_firmware_header *)info->fw->data;
1568 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1569 			adev->firmware.fw_size +=
1570 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1571 
1572 			/* TODO: Determine if MEC2 JT FW loading can be removed
1573 				 for all GFX V9 asic and above */
1574 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1575 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1576 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1577 				info->fw = adev->gfx.mec2_fw;
1578 				adev->firmware.fw_size +=
1579 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1580 					PAGE_SIZE);
1581 			}
1582 		}
1583 	}
1584 
1585 out:
1586 	gfx_v9_0_check_if_need_gfxoff(adev);
1587 	gfx_v9_0_check_fw_write_wait(adev);
1588 	if (err) {
1589 		dev_err(adev->dev,
1590 			"gfx9: Failed to load firmware \"%s\"\n",
1591 			fw_name);
1592 		release_firmware(adev->gfx.mec_fw);
1593 		adev->gfx.mec_fw = NULL;
1594 		release_firmware(adev->gfx.mec2_fw);
1595 		adev->gfx.mec2_fw = NULL;
1596 	}
1597 	return err;
1598 }
1599 
1600 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1601 {
1602 	const char *chip_name;
1603 	int r;
1604 
1605 	DRM_DEBUG("\n");
1606 
1607 	switch (adev->ip_versions[GC_HWIP][0]) {
1608 	case IP_VERSION(9, 0, 1):
1609 		chip_name = "vega10";
1610 		break;
1611 	case IP_VERSION(9, 2, 1):
1612 		chip_name = "vega12";
1613 		break;
1614 	case IP_VERSION(9, 4, 0):
1615 		chip_name = "vega20";
1616 		break;
1617 	case IP_VERSION(9, 2, 2):
1618 	case IP_VERSION(9, 1, 0):
1619 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1620 			chip_name = "raven2";
1621 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1622 			chip_name = "picasso";
1623 		else
1624 			chip_name = "raven";
1625 		break;
1626 	case IP_VERSION(9, 4, 1):
1627 		chip_name = "arcturus";
1628 		break;
1629 	case IP_VERSION(9, 3, 0):
1630 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1631 			chip_name = "renoir";
1632 		else
1633 			chip_name = "green_sardine";
1634 		break;
1635 	case IP_VERSION(9, 4, 2):
1636 		chip_name = "aldebaran";
1637 		break;
1638 	default:
1639 		BUG();
1640 	}
1641 
1642 	/* No CPG in Arcturus */
1643 	if (adev->gfx.num_gfx_rings) {
1644 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1645 		if (r)
1646 			return r;
1647 	}
1648 
1649 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1650 	if (r)
1651 		return r;
1652 
1653 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1654 	if (r)
1655 		return r;
1656 
1657 	return r;
1658 }
1659 
1660 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1661 {
1662 	u32 count = 0;
1663 	const struct cs_section_def *sect = NULL;
1664 	const struct cs_extent_def *ext = NULL;
1665 
1666 	/* begin clear state */
1667 	count += 2;
1668 	/* context control state */
1669 	count += 3;
1670 
1671 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1672 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1673 			if (sect->id == SECT_CONTEXT)
1674 				count += 2 + ext->reg_count;
1675 			else
1676 				return 0;
1677 		}
1678 	}
1679 
1680 	/* end clear state */
1681 	count += 2;
1682 	/* clear state */
1683 	count += 2;
1684 
1685 	return count;
1686 }
1687 
1688 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1689 				    volatile u32 *buffer)
1690 {
1691 	u32 count = 0, i;
1692 	const struct cs_section_def *sect = NULL;
1693 	const struct cs_extent_def *ext = NULL;
1694 
1695 	if (adev->gfx.rlc.cs_data == NULL)
1696 		return;
1697 	if (buffer == NULL)
1698 		return;
1699 
1700 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1701 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1702 
1703 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1704 	buffer[count++] = cpu_to_le32(0x80000000);
1705 	buffer[count++] = cpu_to_le32(0x80000000);
1706 
1707 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1708 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1709 			if (sect->id == SECT_CONTEXT) {
1710 				buffer[count++] =
1711 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1712 				buffer[count++] = cpu_to_le32(ext->reg_index -
1713 						PACKET3_SET_CONTEXT_REG_START);
1714 				for (i = 0; i < ext->reg_count; i++)
1715 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1716 			} else {
1717 				return;
1718 			}
1719 		}
1720 	}
1721 
1722 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1723 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1724 
1725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1726 	buffer[count++] = cpu_to_le32(0);
1727 }
1728 
1729 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1730 {
1731 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1732 	uint32_t pg_always_on_cu_num = 2;
1733 	uint32_t always_on_cu_num;
1734 	uint32_t i, j, k;
1735 	uint32_t mask, cu_bitmap, counter;
1736 
1737 	if (adev->flags & AMD_IS_APU)
1738 		always_on_cu_num = 4;
1739 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1740 		always_on_cu_num = 8;
1741 	else
1742 		always_on_cu_num = 12;
1743 
1744 	mutex_lock(&adev->grbm_idx_mutex);
1745 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1746 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1747 			mask = 1;
1748 			cu_bitmap = 0;
1749 			counter = 0;
1750 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1751 
1752 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1753 				if (cu_info->bitmap[i][j] & mask) {
1754 					if (counter == pg_always_on_cu_num)
1755 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1756 					if (counter < always_on_cu_num)
1757 						cu_bitmap |= mask;
1758 					else
1759 						break;
1760 					counter++;
1761 				}
1762 				mask <<= 1;
1763 			}
1764 
1765 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1766 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1767 		}
1768 	}
1769 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1770 	mutex_unlock(&adev->grbm_idx_mutex);
1771 }
1772 
1773 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1774 {
1775 	uint32_t data;
1776 
1777 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1778 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1779 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1780 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1781 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1782 
1783 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1784 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1785 
1786 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1787 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1788 
1789 	mutex_lock(&adev->grbm_idx_mutex);
1790 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1791 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1792 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1793 
1794 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1795 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1796 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1797 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1798 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1799 
1800 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1801 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1802 	data &= 0x0000FFFF;
1803 	data |= 0x00C00000;
1804 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1805 
1806 	/*
1807 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1808 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1809 	 */
1810 
1811 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1812 	 * but used for RLC_LB_CNTL configuration */
1813 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1814 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1815 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1816 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1817 	mutex_unlock(&adev->grbm_idx_mutex);
1818 
1819 	gfx_v9_0_init_always_on_cu_mask(adev);
1820 }
1821 
1822 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1823 {
1824 	uint32_t data;
1825 
1826 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1827 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1828 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1829 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1830 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1831 
1832 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1833 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1834 
1835 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1836 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1837 
1838 	mutex_lock(&adev->grbm_idx_mutex);
1839 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1840 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1841 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1842 
1843 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1844 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1845 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1846 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1847 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1848 
1849 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1850 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1851 	data &= 0x0000FFFF;
1852 	data |= 0x00C00000;
1853 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1854 
1855 	/*
1856 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1857 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1858 	 */
1859 
1860 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1861 	 * but used for RLC_LB_CNTL configuration */
1862 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1863 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1864 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1865 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1866 	mutex_unlock(&adev->grbm_idx_mutex);
1867 
1868 	gfx_v9_0_init_always_on_cu_mask(adev);
1869 }
1870 
1871 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1872 {
1873 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1874 }
1875 
1876 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1877 {
1878 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1879 		return 5;
1880 	else
1881 		return 4;
1882 }
1883 
1884 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1885 {
1886 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1887 
1888 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1889 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1890 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1891 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1892 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1893 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1894 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1895 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1896 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1897 }
1898 
1899 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1900 {
1901 	const struct cs_section_def *cs_data;
1902 	int r;
1903 
1904 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1905 
1906 	cs_data = adev->gfx.rlc.cs_data;
1907 
1908 	if (cs_data) {
1909 		/* init clear state block */
1910 		r = amdgpu_gfx_rlc_init_csb(adev);
1911 		if (r)
1912 			return r;
1913 	}
1914 
1915 	if (adev->flags & AMD_IS_APU) {
1916 		/* TODO: double check the cp_table_size for RV */
1917 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1918 		r = amdgpu_gfx_rlc_init_cpt(adev);
1919 		if (r)
1920 			return r;
1921 	}
1922 
1923 	switch (adev->ip_versions[GC_HWIP][0]) {
1924 	case IP_VERSION(9, 2, 2):
1925 	case IP_VERSION(9, 1, 0):
1926 		gfx_v9_0_init_lbpw(adev);
1927 		break;
1928 	case IP_VERSION(9, 4, 0):
1929 		gfx_v9_4_init_lbpw(adev);
1930 		break;
1931 	default:
1932 		break;
1933 	}
1934 
1935 	/* init spm vmid with 0xf */
1936 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1937 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1938 
1939 	return 0;
1940 }
1941 
1942 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1943 {
1944 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1945 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1946 }
1947 
1948 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1949 {
1950 	int r;
1951 	u32 *hpd;
1952 	const __le32 *fw_data;
1953 	unsigned fw_size;
1954 	u32 *fw;
1955 	size_t mec_hpd_size;
1956 
1957 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1958 
1959 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1960 
1961 	/* take ownership of the relevant compute queues */
1962 	amdgpu_gfx_compute_queue_acquire(adev);
1963 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1964 	if (mec_hpd_size) {
1965 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1966 					      AMDGPU_GEM_DOMAIN_VRAM,
1967 					      &adev->gfx.mec.hpd_eop_obj,
1968 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1969 					      (void **)&hpd);
1970 		if (r) {
1971 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1972 			gfx_v9_0_mec_fini(adev);
1973 			return r;
1974 		}
1975 
1976 		memset(hpd, 0, mec_hpd_size);
1977 
1978 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1979 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1980 	}
1981 
1982 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1983 
1984 	fw_data = (const __le32 *)
1985 		(adev->gfx.mec_fw->data +
1986 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1987 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1988 
1989 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1990 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1991 				      &adev->gfx.mec.mec_fw_obj,
1992 				      &adev->gfx.mec.mec_fw_gpu_addr,
1993 				      (void **)&fw);
1994 	if (r) {
1995 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1996 		gfx_v9_0_mec_fini(adev);
1997 		return r;
1998 	}
1999 
2000 	memcpy(fw, fw_data, fw_size);
2001 
2002 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2003 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2004 
2005 	return 0;
2006 }
2007 
2008 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2009 {
2010 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2011 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2012 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2013 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2014 		(SQ_IND_INDEX__FORCE_READ_MASK));
2015 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2016 }
2017 
2018 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2019 			   uint32_t wave, uint32_t thread,
2020 			   uint32_t regno, uint32_t num, uint32_t *out)
2021 {
2022 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2023 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2024 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2025 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2026 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2027 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2028 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2029 	while (num--)
2030 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2031 }
2032 
2033 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2034 {
2035 	/* type 1 wave data */
2036 	dst[(*no_fields)++] = 1;
2037 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2038 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2039 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2040 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2041 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2042 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2043 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2044 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2045 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2046 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2047 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2048 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2049 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2050 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2051 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2052 }
2053 
2054 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2055 				     uint32_t wave, uint32_t start,
2056 				     uint32_t size, uint32_t *dst)
2057 {
2058 	wave_read_regs(
2059 		adev, simd, wave, 0,
2060 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2061 }
2062 
2063 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2064 				     uint32_t wave, uint32_t thread,
2065 				     uint32_t start, uint32_t size,
2066 				     uint32_t *dst)
2067 {
2068 	wave_read_regs(
2069 		adev, simd, wave, thread,
2070 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2071 }
2072 
2073 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2074 				  u32 me, u32 pipe, u32 q, u32 vm)
2075 {
2076 	soc15_grbm_select(adev, me, pipe, q, vm);
2077 }
2078 
2079 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2080         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2081         .select_se_sh = &gfx_v9_0_select_se_sh,
2082         .read_wave_data = &gfx_v9_0_read_wave_data,
2083         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2084         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2085         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2086 };
2087 
2088 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2089 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2090 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2091 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2092 };
2093 
2094 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2095 	.ras_block = {
2096 		.hw_ops = &gfx_v9_0_ras_ops,
2097 	},
2098 };
2099 
2100 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2101 {
2102 	u32 gb_addr_config;
2103 	int err;
2104 
2105 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2106 
2107 	switch (adev->ip_versions[GC_HWIP][0]) {
2108 	case IP_VERSION(9, 0, 1):
2109 		adev->gfx.config.max_hw_contexts = 8;
2110 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2111 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2112 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2113 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2114 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2115 		break;
2116 	case IP_VERSION(9, 2, 1):
2117 		adev->gfx.config.max_hw_contexts = 8;
2118 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2119 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2120 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2121 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2122 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2123 		DRM_INFO("fix gfx.config for vega12\n");
2124 		break;
2125 	case IP_VERSION(9, 4, 0):
2126 		adev->gfx.ras = &gfx_v9_0_ras;
2127 		adev->gfx.config.max_hw_contexts = 8;
2128 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2129 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2130 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2131 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2132 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2133 		gb_addr_config &= ~0xf3e777ff;
2134 		gb_addr_config |= 0x22014042;
2135 		/* check vbios table if gpu info is not available */
2136 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2137 		if (err)
2138 			return err;
2139 		break;
2140 	case IP_VERSION(9, 2, 2):
2141 	case IP_VERSION(9, 1, 0):
2142 		adev->gfx.config.max_hw_contexts = 8;
2143 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2144 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2145 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2146 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2147 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2148 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2149 		else
2150 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2151 		break;
2152 	case IP_VERSION(9, 4, 1):
2153 		adev->gfx.ras = &gfx_v9_4_ras;
2154 		adev->gfx.config.max_hw_contexts = 8;
2155 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2156 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2157 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2158 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2159 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2160 		gb_addr_config &= ~0xf3e777ff;
2161 		gb_addr_config |= 0x22014042;
2162 		break;
2163 	case IP_VERSION(9, 3, 0):
2164 		adev->gfx.config.max_hw_contexts = 8;
2165 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2166 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2167 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2168 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2169 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2170 		gb_addr_config &= ~0xf3e777ff;
2171 		gb_addr_config |= 0x22010042;
2172 		break;
2173 	case IP_VERSION(9, 4, 2):
2174 		adev->gfx.ras = &gfx_v9_4_2_ras;
2175 		adev->gfx.config.max_hw_contexts = 8;
2176 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2177 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2178 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2179 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2180 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2181 		gb_addr_config &= ~0xf3e777ff;
2182 		gb_addr_config |= 0x22014042;
2183 		/* check vbios table if gpu info is not available */
2184 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2185 		if (err)
2186 			return err;
2187 		break;
2188 	default:
2189 		BUG();
2190 		break;
2191 	}
2192 
2193 	if (adev->gfx.ras) {
2194 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2195 		if (err) {
2196 			DRM_ERROR("Failed to register gfx ras block!\n");
2197 			return err;
2198 		}
2199 
2200 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2201 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2202 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2203 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2204 
2205 		/* If not define special ras_late_init function, use gfx default ras_late_init */
2206 		if (!adev->gfx.ras->ras_block.ras_late_init)
2207 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2208 
2209 		/* If not defined special ras_cb function, use default ras_cb */
2210 		if (!adev->gfx.ras->ras_block.ras_cb)
2211 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2212 	}
2213 
2214 	adev->gfx.config.gb_addr_config = gb_addr_config;
2215 
2216 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2217 			REG_GET_FIELD(
2218 					adev->gfx.config.gb_addr_config,
2219 					GB_ADDR_CONFIG,
2220 					NUM_PIPES);
2221 
2222 	adev->gfx.config.max_tile_pipes =
2223 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2224 
2225 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2226 			REG_GET_FIELD(
2227 					adev->gfx.config.gb_addr_config,
2228 					GB_ADDR_CONFIG,
2229 					NUM_BANKS);
2230 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2231 			REG_GET_FIELD(
2232 					adev->gfx.config.gb_addr_config,
2233 					GB_ADDR_CONFIG,
2234 					MAX_COMPRESSED_FRAGS);
2235 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2236 			REG_GET_FIELD(
2237 					adev->gfx.config.gb_addr_config,
2238 					GB_ADDR_CONFIG,
2239 					NUM_RB_PER_SE);
2240 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2241 			REG_GET_FIELD(
2242 					adev->gfx.config.gb_addr_config,
2243 					GB_ADDR_CONFIG,
2244 					NUM_SHADER_ENGINES);
2245 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2246 			REG_GET_FIELD(
2247 					adev->gfx.config.gb_addr_config,
2248 					GB_ADDR_CONFIG,
2249 					PIPE_INTERLEAVE_SIZE));
2250 
2251 	return 0;
2252 }
2253 
2254 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2255 				      int mec, int pipe, int queue)
2256 {
2257 	unsigned irq_type;
2258 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2259 	unsigned int hw_prio;
2260 
2261 	ring = &adev->gfx.compute_ring[ring_id];
2262 
2263 	/* mec0 is me1 */
2264 	ring->me = mec + 1;
2265 	ring->pipe = pipe;
2266 	ring->queue = queue;
2267 
2268 	ring->ring_obj = NULL;
2269 	ring->use_doorbell = true;
2270 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2271 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2272 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2273 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2274 
2275 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2276 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2277 		+ ring->pipe;
2278 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2279 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2280 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2281 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2282 				hw_prio, NULL);
2283 }
2284 
2285 static int gfx_v9_0_sw_init(void *handle)
2286 {
2287 	int i, j, k, r, ring_id;
2288 	struct amdgpu_ring *ring;
2289 	struct amdgpu_kiq *kiq;
2290 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2291 
2292 	switch (adev->ip_versions[GC_HWIP][0]) {
2293 	case IP_VERSION(9, 0, 1):
2294 	case IP_VERSION(9, 2, 1):
2295 	case IP_VERSION(9, 4, 0):
2296 	case IP_VERSION(9, 2, 2):
2297 	case IP_VERSION(9, 1, 0):
2298 	case IP_VERSION(9, 4, 1):
2299 	case IP_VERSION(9, 3, 0):
2300 	case IP_VERSION(9, 4, 2):
2301 		adev->gfx.mec.num_mec = 2;
2302 		break;
2303 	default:
2304 		adev->gfx.mec.num_mec = 1;
2305 		break;
2306 	}
2307 
2308 	adev->gfx.mec.num_pipe_per_mec = 4;
2309 	adev->gfx.mec.num_queue_per_pipe = 8;
2310 
2311 	/* EOP Event */
2312 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2313 	if (r)
2314 		return r;
2315 
2316 	/* Privileged reg */
2317 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2318 			      &adev->gfx.priv_reg_irq);
2319 	if (r)
2320 		return r;
2321 
2322 	/* Privileged inst */
2323 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2324 			      &adev->gfx.priv_inst_irq);
2325 	if (r)
2326 		return r;
2327 
2328 	/* ECC error */
2329 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2330 			      &adev->gfx.cp_ecc_error_irq);
2331 	if (r)
2332 		return r;
2333 
2334 	/* FUE error */
2335 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2336 			      &adev->gfx.cp_ecc_error_irq);
2337 	if (r)
2338 		return r;
2339 
2340 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2341 
2342 	gfx_v9_0_scratch_init(adev);
2343 
2344 	r = gfx_v9_0_init_microcode(adev);
2345 	if (r) {
2346 		DRM_ERROR("Failed to load gfx firmware!\n");
2347 		return r;
2348 	}
2349 
2350 	if (adev->gfx.rlc.funcs) {
2351 		if (adev->gfx.rlc.funcs->init) {
2352 			r = adev->gfx.rlc.funcs->init(adev);
2353 			if (r) {
2354 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2355 				return r;
2356 			}
2357 		}
2358 	}
2359 
2360 	r = gfx_v9_0_mec_init(adev);
2361 	if (r) {
2362 		DRM_ERROR("Failed to init MEC BOs!\n");
2363 		return r;
2364 	}
2365 
2366 	/* set up the gfx ring */
2367 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2368 		ring = &adev->gfx.gfx_ring[i];
2369 		ring->ring_obj = NULL;
2370 		if (!i)
2371 			sprintf(ring->name, "gfx");
2372 		else
2373 			sprintf(ring->name, "gfx_%d", i);
2374 		ring->use_doorbell = true;
2375 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2376 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2377 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2378 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2379 		if (r)
2380 			return r;
2381 	}
2382 
2383 	/* set up the compute queues - allocate horizontally across pipes */
2384 	ring_id = 0;
2385 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2386 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2387 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2388 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2389 					continue;
2390 
2391 				r = gfx_v9_0_compute_ring_init(adev,
2392 							       ring_id,
2393 							       i, k, j);
2394 				if (r)
2395 					return r;
2396 
2397 				ring_id++;
2398 			}
2399 		}
2400 	}
2401 
2402 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2403 	if (r) {
2404 		DRM_ERROR("Failed to init KIQ BOs!\n");
2405 		return r;
2406 	}
2407 
2408 	kiq = &adev->gfx.kiq;
2409 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2410 	if (r)
2411 		return r;
2412 
2413 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2414 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2415 	if (r)
2416 		return r;
2417 
2418 	adev->gfx.ce_ram_size = 0x8000;
2419 
2420 	r = gfx_v9_0_gpu_early_init(adev);
2421 	if (r)
2422 		return r;
2423 
2424 	return 0;
2425 }
2426 
2427 
2428 static int gfx_v9_0_sw_fini(void *handle)
2429 {
2430 	int i;
2431 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2432 
2433 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2434 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2435 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2436 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2437 
2438 	amdgpu_gfx_mqd_sw_fini(adev);
2439 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2440 	amdgpu_gfx_kiq_fini(adev);
2441 
2442 	gfx_v9_0_mec_fini(adev);
2443 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2444 				&adev->gfx.rlc.clear_state_gpu_addr,
2445 				(void **)&adev->gfx.rlc.cs_ptr);
2446 	if (adev->flags & AMD_IS_APU) {
2447 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2448 				&adev->gfx.rlc.cp_table_gpu_addr,
2449 				(void **)&adev->gfx.rlc.cp_table_ptr);
2450 	}
2451 	gfx_v9_0_free_microcode(adev);
2452 
2453 	return 0;
2454 }
2455 
2456 
2457 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2458 {
2459 	/* TODO */
2460 }
2461 
2462 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2463 			   u32 instance)
2464 {
2465 	u32 data;
2466 
2467 	if (instance == 0xffffffff)
2468 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2469 	else
2470 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2471 
2472 	if (se_num == 0xffffffff)
2473 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2474 	else
2475 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2476 
2477 	if (sh_num == 0xffffffff)
2478 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2479 	else
2480 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2481 
2482 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2483 }
2484 
2485 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2486 {
2487 	u32 data, mask;
2488 
2489 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2490 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2491 
2492 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2493 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2494 
2495 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2496 					 adev->gfx.config.max_sh_per_se);
2497 
2498 	return (~data) & mask;
2499 }
2500 
2501 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2502 {
2503 	int i, j;
2504 	u32 data;
2505 	u32 active_rbs = 0;
2506 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2507 					adev->gfx.config.max_sh_per_se;
2508 
2509 	mutex_lock(&adev->grbm_idx_mutex);
2510 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2511 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2512 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2513 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2514 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2515 					       rb_bitmap_width_per_sh);
2516 		}
2517 	}
2518 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2519 	mutex_unlock(&adev->grbm_idx_mutex);
2520 
2521 	adev->gfx.config.backend_enable_mask = active_rbs;
2522 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2523 }
2524 
2525 #define DEFAULT_SH_MEM_BASES	(0x6000)
2526 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2527 {
2528 	int i;
2529 	uint32_t sh_mem_config;
2530 	uint32_t sh_mem_bases;
2531 
2532 	/*
2533 	 * Configure apertures:
2534 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2535 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2536 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2537 	 */
2538 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2539 
2540 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2541 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2542 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2543 
2544 	mutex_lock(&adev->srbm_mutex);
2545 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2546 		soc15_grbm_select(adev, 0, 0, 0, i);
2547 		/* CP and shaders */
2548 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2549 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2550 	}
2551 	soc15_grbm_select(adev, 0, 0, 0, 0);
2552 	mutex_unlock(&adev->srbm_mutex);
2553 
2554 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2555 	   acccess. These should be enabled by FW for target VMIDs. */
2556 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2557 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2558 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2559 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2560 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2561 	}
2562 }
2563 
2564 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2565 {
2566 	int vmid;
2567 
2568 	/*
2569 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2570 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2571 	 * the driver can enable them for graphics. VMID0 should maintain
2572 	 * access so that HWS firmware can save/restore entries.
2573 	 */
2574 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2575 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2576 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2577 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2578 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2579 	}
2580 }
2581 
2582 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2583 {
2584 	uint32_t tmp;
2585 
2586 	switch (adev->ip_versions[GC_HWIP][0]) {
2587 	case IP_VERSION(9, 4, 1):
2588 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2589 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2590 					DISABLE_BARRIER_WAITCNT, 1);
2591 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2592 		break;
2593 	default:
2594 		break;
2595 	}
2596 }
2597 
2598 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2599 {
2600 	u32 tmp;
2601 	int i;
2602 
2603 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2604 
2605 	gfx_v9_0_tiling_mode_table_init(adev);
2606 
2607 	gfx_v9_0_setup_rb(adev);
2608 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2609 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2610 
2611 	/* XXX SH_MEM regs */
2612 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2613 	mutex_lock(&adev->srbm_mutex);
2614 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2615 		soc15_grbm_select(adev, 0, 0, 0, i);
2616 		/* CP and shaders */
2617 		if (i == 0) {
2618 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2619 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2620 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2621 					    !!adev->gmc.noretry);
2622 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2623 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2624 		} else {
2625 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2626 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2627 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2628 					    !!adev->gmc.noretry);
2629 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2630 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2631 				(adev->gmc.private_aperture_start >> 48));
2632 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2633 				(adev->gmc.shared_aperture_start >> 48));
2634 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2635 		}
2636 	}
2637 	soc15_grbm_select(adev, 0, 0, 0, 0);
2638 
2639 	mutex_unlock(&adev->srbm_mutex);
2640 
2641 	gfx_v9_0_init_compute_vmid(adev);
2642 	gfx_v9_0_init_gds_vmid(adev);
2643 	gfx_v9_0_init_sq_config(adev);
2644 }
2645 
2646 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2647 {
2648 	u32 i, j, k;
2649 	u32 mask;
2650 
2651 	mutex_lock(&adev->grbm_idx_mutex);
2652 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2653 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2654 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2655 			for (k = 0; k < adev->usec_timeout; k++) {
2656 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2657 					break;
2658 				udelay(1);
2659 			}
2660 			if (k == adev->usec_timeout) {
2661 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2662 						      0xffffffff, 0xffffffff);
2663 				mutex_unlock(&adev->grbm_idx_mutex);
2664 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2665 					 i, j);
2666 				return;
2667 			}
2668 		}
2669 	}
2670 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2671 	mutex_unlock(&adev->grbm_idx_mutex);
2672 
2673 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2674 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2675 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2676 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2677 	for (k = 0; k < adev->usec_timeout; k++) {
2678 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2679 			break;
2680 		udelay(1);
2681 	}
2682 }
2683 
2684 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2685 					       bool enable)
2686 {
2687 	u32 tmp;
2688 
2689 	/* These interrupts should be enabled to drive DS clock */
2690 
2691 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2692 
2693 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2694 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2695 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2696 	if(adev->gfx.num_gfx_rings)
2697 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2698 
2699 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2700 }
2701 
2702 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2703 {
2704 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2705 	/* csib */
2706 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2707 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2708 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2709 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2710 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2711 			adev->gfx.rlc.clear_state_size);
2712 }
2713 
2714 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2715 				int indirect_offset,
2716 				int list_size,
2717 				int *unique_indirect_regs,
2718 				int unique_indirect_reg_count,
2719 				int *indirect_start_offsets,
2720 				int *indirect_start_offsets_count,
2721 				int max_start_offsets_count)
2722 {
2723 	int idx;
2724 
2725 	for (; indirect_offset < list_size; indirect_offset++) {
2726 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2727 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2728 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2729 
2730 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2731 			indirect_offset += 2;
2732 
2733 			/* look for the matching indice */
2734 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2735 				if (unique_indirect_regs[idx] ==
2736 					register_list_format[indirect_offset] ||
2737 					!unique_indirect_regs[idx])
2738 					break;
2739 			}
2740 
2741 			BUG_ON(idx >= unique_indirect_reg_count);
2742 
2743 			if (!unique_indirect_regs[idx])
2744 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2745 
2746 			indirect_offset++;
2747 		}
2748 	}
2749 }
2750 
2751 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2752 {
2753 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2754 	int unique_indirect_reg_count = 0;
2755 
2756 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2757 	int indirect_start_offsets_count = 0;
2758 
2759 	int list_size = 0;
2760 	int i = 0, j = 0;
2761 	u32 tmp = 0;
2762 
2763 	u32 *register_list_format =
2764 		kmemdup(adev->gfx.rlc.register_list_format,
2765 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2766 	if (!register_list_format)
2767 		return -ENOMEM;
2768 
2769 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2770 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2771 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2772 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2773 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2774 				    unique_indirect_regs,
2775 				    unique_indirect_reg_count,
2776 				    indirect_start_offsets,
2777 				    &indirect_start_offsets_count,
2778 				    ARRAY_SIZE(indirect_start_offsets));
2779 
2780 	/* enable auto inc in case it is disabled */
2781 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2782 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2783 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2784 
2785 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2786 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2787 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2788 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2789 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2790 			adev->gfx.rlc.register_restore[i]);
2791 
2792 	/* load indirect register */
2793 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2794 		adev->gfx.rlc.reg_list_format_start);
2795 
2796 	/* direct register portion */
2797 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2798 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2799 			register_list_format[i]);
2800 
2801 	/* indirect register portion */
2802 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2803 		if (register_list_format[i] == 0xFFFFFFFF) {
2804 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2805 			continue;
2806 		}
2807 
2808 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2809 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2810 
2811 		for (j = 0; j < unique_indirect_reg_count; j++) {
2812 			if (register_list_format[i] == unique_indirect_regs[j]) {
2813 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2814 				break;
2815 			}
2816 		}
2817 
2818 		BUG_ON(j >= unique_indirect_reg_count);
2819 
2820 		i++;
2821 	}
2822 
2823 	/* set save/restore list size */
2824 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2825 	list_size = list_size >> 1;
2826 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2827 		adev->gfx.rlc.reg_restore_list_size);
2828 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2829 
2830 	/* write the starting offsets to RLC scratch ram */
2831 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2832 		adev->gfx.rlc.starting_offsets_start);
2833 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2834 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2835 		       indirect_start_offsets[i]);
2836 
2837 	/* load unique indirect regs*/
2838 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2839 		if (unique_indirect_regs[i] != 0) {
2840 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2841 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2842 			       unique_indirect_regs[i] & 0x3FFFF);
2843 
2844 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2845 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2846 			       unique_indirect_regs[i] >> 20);
2847 		}
2848 	}
2849 
2850 	kfree(register_list_format);
2851 	return 0;
2852 }
2853 
2854 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2855 {
2856 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2857 }
2858 
2859 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2860 					     bool enable)
2861 {
2862 	uint32_t data = 0;
2863 	uint32_t default_data = 0;
2864 
2865 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2866 	if (enable) {
2867 		/* enable GFXIP control over CGPG */
2868 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2869 		if(default_data != data)
2870 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2871 
2872 		/* update status */
2873 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2874 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2875 		if(default_data != data)
2876 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2877 	} else {
2878 		/* restore GFXIP control over GCPG */
2879 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2880 		if(default_data != data)
2881 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2882 	}
2883 }
2884 
2885 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2886 {
2887 	uint32_t data = 0;
2888 
2889 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2890 			      AMD_PG_SUPPORT_GFX_SMG |
2891 			      AMD_PG_SUPPORT_GFX_DMG)) {
2892 		/* init IDLE_POLL_COUNT = 60 */
2893 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2894 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2895 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2896 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2897 
2898 		/* init RLC PG Delay */
2899 		data = 0;
2900 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2901 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2902 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2903 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2904 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2905 
2906 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2907 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2908 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2909 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2910 
2911 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2912 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2913 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2914 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2915 
2916 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2917 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2918 
2919 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2920 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2921 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2922 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2923 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2924 	}
2925 }
2926 
2927 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2928 						bool enable)
2929 {
2930 	uint32_t data = 0;
2931 	uint32_t default_data = 0;
2932 
2933 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2934 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2935 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2936 			     enable ? 1 : 0);
2937 	if (default_data != data)
2938 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2939 }
2940 
2941 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2942 						bool enable)
2943 {
2944 	uint32_t data = 0;
2945 	uint32_t default_data = 0;
2946 
2947 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2948 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2949 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2950 			     enable ? 1 : 0);
2951 	if(default_data != data)
2952 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2953 }
2954 
2955 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2956 					bool enable)
2957 {
2958 	uint32_t data = 0;
2959 	uint32_t default_data = 0;
2960 
2961 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2962 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2963 			     CP_PG_DISABLE,
2964 			     enable ? 0 : 1);
2965 	if(default_data != data)
2966 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2967 }
2968 
2969 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2970 						bool enable)
2971 {
2972 	uint32_t data, default_data;
2973 
2974 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2975 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2976 			     GFX_POWER_GATING_ENABLE,
2977 			     enable ? 1 : 0);
2978 	if(default_data != data)
2979 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2980 }
2981 
2982 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2983 						bool enable)
2984 {
2985 	uint32_t data, default_data;
2986 
2987 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2988 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2989 			     GFX_PIPELINE_PG_ENABLE,
2990 			     enable ? 1 : 0);
2991 	if(default_data != data)
2992 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2993 
2994 	if (!enable)
2995 		/* read any GFX register to wake up GFX */
2996 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2997 }
2998 
2999 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3000 						       bool enable)
3001 {
3002 	uint32_t data, default_data;
3003 
3004 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3005 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3006 			     STATIC_PER_CU_PG_ENABLE,
3007 			     enable ? 1 : 0);
3008 	if(default_data != data)
3009 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3010 }
3011 
3012 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3013 						bool enable)
3014 {
3015 	uint32_t data, default_data;
3016 
3017 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3018 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3019 			     DYN_PER_CU_PG_ENABLE,
3020 			     enable ? 1 : 0);
3021 	if(default_data != data)
3022 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3023 }
3024 
3025 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3026 {
3027 	gfx_v9_0_init_csb(adev);
3028 
3029 	/*
3030 	 * Rlc save restore list is workable since v2_1.
3031 	 * And it's needed by gfxoff feature.
3032 	 */
3033 	if (adev->gfx.rlc.is_rlc_v2_1) {
3034 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3035 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3036 			gfx_v9_1_init_rlc_save_restore_list(adev);
3037 		gfx_v9_0_enable_save_restore_machine(adev);
3038 	}
3039 
3040 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3041 			      AMD_PG_SUPPORT_GFX_SMG |
3042 			      AMD_PG_SUPPORT_GFX_DMG |
3043 			      AMD_PG_SUPPORT_CP |
3044 			      AMD_PG_SUPPORT_GDS |
3045 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3046 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3047 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3048 		gfx_v9_0_init_gfx_power_gating(adev);
3049 	}
3050 }
3051 
3052 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3053 {
3054 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3055 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3056 	gfx_v9_0_wait_for_rlc_serdes(adev);
3057 }
3058 
3059 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3060 {
3061 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3062 	udelay(50);
3063 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3064 	udelay(50);
3065 }
3066 
3067 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3068 {
3069 #ifdef AMDGPU_RLC_DEBUG_RETRY
3070 	u32 rlc_ucode_ver;
3071 #endif
3072 
3073 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3074 	udelay(50);
3075 
3076 	/* carrizo do enable cp interrupt after cp inited */
3077 	if (!(adev->flags & AMD_IS_APU)) {
3078 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3079 		udelay(50);
3080 	}
3081 
3082 #ifdef AMDGPU_RLC_DEBUG_RETRY
3083 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3084 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3085 	if(rlc_ucode_ver == 0x108) {
3086 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3087 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3088 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3089 		 * default is 0x9C4 to create a 100us interval */
3090 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3091 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3092 		 * to disable the page fault retry interrupts, default is
3093 		 * 0x100 (256) */
3094 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3095 	}
3096 #endif
3097 }
3098 
3099 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3100 {
3101 	const struct rlc_firmware_header_v2_0 *hdr;
3102 	const __le32 *fw_data;
3103 	unsigned i, fw_size;
3104 
3105 	if (!adev->gfx.rlc_fw)
3106 		return -EINVAL;
3107 
3108 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3109 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3110 
3111 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3112 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3113 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3114 
3115 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3116 			RLCG_UCODE_LOADING_START_ADDRESS);
3117 	for (i = 0; i < fw_size; i++)
3118 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3119 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3120 
3121 	return 0;
3122 }
3123 
3124 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3125 {
3126 	int r;
3127 
3128 	if (amdgpu_sriov_vf(adev)) {
3129 		gfx_v9_0_init_csb(adev);
3130 		return 0;
3131 	}
3132 
3133 	adev->gfx.rlc.funcs->stop(adev);
3134 
3135 	/* disable CG */
3136 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3137 
3138 	gfx_v9_0_init_pg(adev);
3139 
3140 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3141 		/* legacy rlc firmware loading */
3142 		r = gfx_v9_0_rlc_load_microcode(adev);
3143 		if (r)
3144 			return r;
3145 	}
3146 
3147 	switch (adev->ip_versions[GC_HWIP][0]) {
3148 	case IP_VERSION(9, 2, 2):
3149 	case IP_VERSION(9, 1, 0):
3150 		if (amdgpu_lbpw == 0)
3151 			gfx_v9_0_enable_lbpw(adev, false);
3152 		else
3153 			gfx_v9_0_enable_lbpw(adev, true);
3154 		break;
3155 	case IP_VERSION(9, 4, 0):
3156 		if (amdgpu_lbpw > 0)
3157 			gfx_v9_0_enable_lbpw(adev, true);
3158 		else
3159 			gfx_v9_0_enable_lbpw(adev, false);
3160 		break;
3161 	default:
3162 		break;
3163 	}
3164 
3165 	adev->gfx.rlc.funcs->start(adev);
3166 
3167 	return 0;
3168 }
3169 
3170 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3171 {
3172 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3173 
3174 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3175 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3176 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3177 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3178 	udelay(50);
3179 }
3180 
3181 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3182 {
3183 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3184 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3185 	const struct gfx_firmware_header_v1_0 *me_hdr;
3186 	const __le32 *fw_data;
3187 	unsigned i, fw_size;
3188 
3189 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3190 		return -EINVAL;
3191 
3192 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3193 		adev->gfx.pfp_fw->data;
3194 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3195 		adev->gfx.ce_fw->data;
3196 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3197 		adev->gfx.me_fw->data;
3198 
3199 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3200 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3201 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3202 
3203 	gfx_v9_0_cp_gfx_enable(adev, false);
3204 
3205 	/* PFP */
3206 	fw_data = (const __le32 *)
3207 		(adev->gfx.pfp_fw->data +
3208 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3209 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3210 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3211 	for (i = 0; i < fw_size; i++)
3212 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3213 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3214 
3215 	/* CE */
3216 	fw_data = (const __le32 *)
3217 		(adev->gfx.ce_fw->data +
3218 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3219 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3220 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3221 	for (i = 0; i < fw_size; i++)
3222 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3223 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3224 
3225 	/* ME */
3226 	fw_data = (const __le32 *)
3227 		(adev->gfx.me_fw->data +
3228 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3229 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3230 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3231 	for (i = 0; i < fw_size; i++)
3232 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3233 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3234 
3235 	return 0;
3236 }
3237 
3238 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3239 {
3240 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3241 	const struct cs_section_def *sect = NULL;
3242 	const struct cs_extent_def *ext = NULL;
3243 	int r, i, tmp;
3244 
3245 	/* init the CP */
3246 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3247 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3248 
3249 	gfx_v9_0_cp_gfx_enable(adev, true);
3250 
3251 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3252 	if (r) {
3253 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3254 		return r;
3255 	}
3256 
3257 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3258 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3259 
3260 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3261 	amdgpu_ring_write(ring, 0x80000000);
3262 	amdgpu_ring_write(ring, 0x80000000);
3263 
3264 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3265 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3266 			if (sect->id == SECT_CONTEXT) {
3267 				amdgpu_ring_write(ring,
3268 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3269 					       ext->reg_count));
3270 				amdgpu_ring_write(ring,
3271 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3272 				for (i = 0; i < ext->reg_count; i++)
3273 					amdgpu_ring_write(ring, ext->extent[i]);
3274 			}
3275 		}
3276 	}
3277 
3278 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3279 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3280 
3281 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3282 	amdgpu_ring_write(ring, 0);
3283 
3284 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3285 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3286 	amdgpu_ring_write(ring, 0x8000);
3287 	amdgpu_ring_write(ring, 0x8000);
3288 
3289 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3290 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3291 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3292 	amdgpu_ring_write(ring, tmp);
3293 	amdgpu_ring_write(ring, 0);
3294 
3295 	amdgpu_ring_commit(ring);
3296 
3297 	return 0;
3298 }
3299 
3300 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3301 {
3302 	struct amdgpu_ring *ring;
3303 	u32 tmp;
3304 	u32 rb_bufsz;
3305 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3306 
3307 	/* Set the write pointer delay */
3308 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3309 
3310 	/* set the RB to use vmid 0 */
3311 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3312 
3313 	/* Set ring buffer size */
3314 	ring = &adev->gfx.gfx_ring[0];
3315 	rb_bufsz = order_base_2(ring->ring_size / 8);
3316 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3317 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3318 #ifdef __BIG_ENDIAN
3319 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3320 #endif
3321 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3322 
3323 	/* Initialize the ring buffer's write pointers */
3324 	ring->wptr = 0;
3325 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3326 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3327 
3328 	/* set the wb address wether it's enabled or not */
3329 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3330 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3331 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3332 
3333 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3334 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3335 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3336 
3337 	mdelay(1);
3338 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3339 
3340 	rb_addr = ring->gpu_addr >> 8;
3341 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3342 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3343 
3344 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3345 	if (ring->use_doorbell) {
3346 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3347 				    DOORBELL_OFFSET, ring->doorbell_index);
3348 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3349 				    DOORBELL_EN, 1);
3350 	} else {
3351 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3352 	}
3353 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3354 
3355 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3356 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3357 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3358 
3359 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3360 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3361 
3362 
3363 	/* start the ring */
3364 	gfx_v9_0_cp_gfx_start(adev);
3365 	ring->sched.ready = true;
3366 
3367 	return 0;
3368 }
3369 
3370 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3371 {
3372 	if (enable) {
3373 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3374 	} else {
3375 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3376 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3377 		adev->gfx.kiq.ring.sched.ready = false;
3378 	}
3379 	udelay(50);
3380 }
3381 
3382 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3383 {
3384 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3385 	const __le32 *fw_data;
3386 	unsigned i;
3387 	u32 tmp;
3388 
3389 	if (!adev->gfx.mec_fw)
3390 		return -EINVAL;
3391 
3392 	gfx_v9_0_cp_compute_enable(adev, false);
3393 
3394 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3395 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3396 
3397 	fw_data = (const __le32 *)
3398 		(adev->gfx.mec_fw->data +
3399 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3400 	tmp = 0;
3401 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3402 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3403 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3404 
3405 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3406 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3407 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3408 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3409 
3410 	/* MEC1 */
3411 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3412 			 mec_hdr->jt_offset);
3413 	for (i = 0; i < mec_hdr->jt_size; i++)
3414 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3415 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3416 
3417 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3418 			adev->gfx.mec_fw_version);
3419 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3420 
3421 	return 0;
3422 }
3423 
3424 /* KIQ functions */
3425 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3426 {
3427 	uint32_t tmp;
3428 	struct amdgpu_device *adev = ring->adev;
3429 
3430 	/* tell RLC which is KIQ queue */
3431 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3432 	tmp &= 0xffffff00;
3433 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3434 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3435 	tmp |= 0x80;
3436 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3437 }
3438 
3439 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3440 {
3441 	struct amdgpu_device *adev = ring->adev;
3442 
3443 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3444 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3445 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3446 			mqd->cp_hqd_queue_priority =
3447 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3448 		}
3449 	}
3450 }
3451 
3452 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3453 {
3454 	struct amdgpu_device *adev = ring->adev;
3455 	struct v9_mqd *mqd = ring->mqd_ptr;
3456 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3457 	uint32_t tmp;
3458 
3459 	mqd->header = 0xC0310800;
3460 	mqd->compute_pipelinestat_enable = 0x00000001;
3461 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3462 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3463 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3464 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3465 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3466 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3467 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3468 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3469 	mqd->compute_misc_reserved = 0x00000003;
3470 
3471 	mqd->dynamic_cu_mask_addr_lo =
3472 		lower_32_bits(ring->mqd_gpu_addr
3473 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3474 	mqd->dynamic_cu_mask_addr_hi =
3475 		upper_32_bits(ring->mqd_gpu_addr
3476 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3477 
3478 	eop_base_addr = ring->eop_gpu_addr >> 8;
3479 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3480 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3481 
3482 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3483 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3484 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3485 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3486 
3487 	mqd->cp_hqd_eop_control = tmp;
3488 
3489 	/* enable doorbell? */
3490 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3491 
3492 	if (ring->use_doorbell) {
3493 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3494 				    DOORBELL_OFFSET, ring->doorbell_index);
3495 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3496 				    DOORBELL_EN, 1);
3497 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3498 				    DOORBELL_SOURCE, 0);
3499 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3500 				    DOORBELL_HIT, 0);
3501 	} else {
3502 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3503 					 DOORBELL_EN, 0);
3504 	}
3505 
3506 	mqd->cp_hqd_pq_doorbell_control = tmp;
3507 
3508 	/* disable the queue if it's active */
3509 	ring->wptr = 0;
3510 	mqd->cp_hqd_dequeue_request = 0;
3511 	mqd->cp_hqd_pq_rptr = 0;
3512 	mqd->cp_hqd_pq_wptr_lo = 0;
3513 	mqd->cp_hqd_pq_wptr_hi = 0;
3514 
3515 	/* set the pointer to the MQD */
3516 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3517 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3518 
3519 	/* set MQD vmid to 0 */
3520 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3521 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3522 	mqd->cp_mqd_control = tmp;
3523 
3524 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3525 	hqd_gpu_addr = ring->gpu_addr >> 8;
3526 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3527 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3528 
3529 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3530 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3531 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3532 			    (order_base_2(ring->ring_size / 4) - 1));
3533 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3534 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3535 #ifdef __BIG_ENDIAN
3536 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3537 #endif
3538 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3539 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3540 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3541 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3542 	mqd->cp_hqd_pq_control = tmp;
3543 
3544 	/* set the wb address whether it's enabled or not */
3545 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3546 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3547 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3548 		upper_32_bits(wb_gpu_addr) & 0xffff;
3549 
3550 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3551 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3552 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3553 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3554 
3555 	tmp = 0;
3556 	/* enable the doorbell if requested */
3557 	if (ring->use_doorbell) {
3558 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3559 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3560 				DOORBELL_OFFSET, ring->doorbell_index);
3561 
3562 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3563 					 DOORBELL_EN, 1);
3564 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3565 					 DOORBELL_SOURCE, 0);
3566 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3567 					 DOORBELL_HIT, 0);
3568 	}
3569 
3570 	mqd->cp_hqd_pq_doorbell_control = tmp;
3571 
3572 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3573 	ring->wptr = 0;
3574 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3575 
3576 	/* set the vmid for the queue */
3577 	mqd->cp_hqd_vmid = 0;
3578 
3579 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3580 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3581 	mqd->cp_hqd_persistent_state = tmp;
3582 
3583 	/* set MIN_IB_AVAIL_SIZE */
3584 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3585 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3586 	mqd->cp_hqd_ib_control = tmp;
3587 
3588 	/* set static priority for a queue/ring */
3589 	gfx_v9_0_mqd_set_priority(ring, mqd);
3590 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3591 
3592 	/* map_queues packet doesn't need activate the queue,
3593 	 * so only kiq need set this field.
3594 	 */
3595 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3596 		mqd->cp_hqd_active = 1;
3597 
3598 	return 0;
3599 }
3600 
3601 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3602 {
3603 	struct amdgpu_device *adev = ring->adev;
3604 	struct v9_mqd *mqd = ring->mqd_ptr;
3605 	int j;
3606 
3607 	/* disable wptr polling */
3608 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3609 
3610 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3611 	       mqd->cp_hqd_eop_base_addr_lo);
3612 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3613 	       mqd->cp_hqd_eop_base_addr_hi);
3614 
3615 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3616 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3617 	       mqd->cp_hqd_eop_control);
3618 
3619 	/* enable doorbell? */
3620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3621 	       mqd->cp_hqd_pq_doorbell_control);
3622 
3623 	/* disable the queue if it's active */
3624 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3625 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3626 		for (j = 0; j < adev->usec_timeout; j++) {
3627 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3628 				break;
3629 			udelay(1);
3630 		}
3631 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3632 		       mqd->cp_hqd_dequeue_request);
3633 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3634 		       mqd->cp_hqd_pq_rptr);
3635 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3636 		       mqd->cp_hqd_pq_wptr_lo);
3637 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3638 		       mqd->cp_hqd_pq_wptr_hi);
3639 	}
3640 
3641 	/* set the pointer to the MQD */
3642 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3643 	       mqd->cp_mqd_base_addr_lo);
3644 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3645 	       mqd->cp_mqd_base_addr_hi);
3646 
3647 	/* set MQD vmid to 0 */
3648 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3649 	       mqd->cp_mqd_control);
3650 
3651 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3652 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3653 	       mqd->cp_hqd_pq_base_lo);
3654 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3655 	       mqd->cp_hqd_pq_base_hi);
3656 
3657 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3658 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3659 	       mqd->cp_hqd_pq_control);
3660 
3661 	/* set the wb address whether it's enabled or not */
3662 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3663 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3664 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3665 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3666 
3667 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3668 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3669 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3670 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3671 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3672 
3673 	/* enable the doorbell if requested */
3674 	if (ring->use_doorbell) {
3675 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3676 					(adev->doorbell_index.kiq * 2) << 2);
3677 		/* If GC has entered CGPG, ringing doorbell > first page
3678 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3679 		 * workaround this issue. And this change has to align with firmware
3680 		 * update.
3681 		 */
3682 		if (check_if_enlarge_doorbell_range(adev))
3683 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3684 					(adev->doorbell.size - 4));
3685 		else
3686 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3687 					(adev->doorbell_index.userqueue_end * 2) << 2);
3688 	}
3689 
3690 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3691 	       mqd->cp_hqd_pq_doorbell_control);
3692 
3693 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3694 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3695 	       mqd->cp_hqd_pq_wptr_lo);
3696 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3697 	       mqd->cp_hqd_pq_wptr_hi);
3698 
3699 	/* set the vmid for the queue */
3700 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3701 
3702 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3703 	       mqd->cp_hqd_persistent_state);
3704 
3705 	/* activate the queue */
3706 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3707 	       mqd->cp_hqd_active);
3708 
3709 	if (ring->use_doorbell)
3710 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3711 
3712 	return 0;
3713 }
3714 
3715 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3716 {
3717 	struct amdgpu_device *adev = ring->adev;
3718 	int j;
3719 
3720 	/* disable the queue if it's active */
3721 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3722 
3723 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3724 
3725 		for (j = 0; j < adev->usec_timeout; j++) {
3726 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3727 				break;
3728 			udelay(1);
3729 		}
3730 
3731 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3732 			DRM_DEBUG("KIQ dequeue request failed.\n");
3733 
3734 			/* Manual disable if dequeue request times out */
3735 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3736 		}
3737 
3738 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3739 		      0);
3740 	}
3741 
3742 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3743 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3744 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3745 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3746 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3747 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3748 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3749 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3750 
3751 	return 0;
3752 }
3753 
3754 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3755 {
3756 	struct amdgpu_device *adev = ring->adev;
3757 	struct v9_mqd *mqd = ring->mqd_ptr;
3758 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3759 	struct v9_mqd *tmp_mqd;
3760 
3761 	gfx_v9_0_kiq_setting(ring);
3762 
3763 	/* GPU could be in bad state during probe, driver trigger the reset
3764 	 * after load the SMU, in this case , the mqd is not be initialized.
3765 	 * driver need to re-init the mqd.
3766 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3767 	 */
3768 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3769 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3770 		/* for GPU_RESET case , reset MQD to a clean status */
3771 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3772 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3773 
3774 		/* reset ring buffer */
3775 		ring->wptr = 0;
3776 		amdgpu_ring_clear_ring(ring);
3777 
3778 		mutex_lock(&adev->srbm_mutex);
3779 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3780 		gfx_v9_0_kiq_init_register(ring);
3781 		soc15_grbm_select(adev, 0, 0, 0, 0);
3782 		mutex_unlock(&adev->srbm_mutex);
3783 	} else {
3784 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3785 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3786 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3787 		mutex_lock(&adev->srbm_mutex);
3788 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3789 		gfx_v9_0_mqd_init(ring);
3790 		gfx_v9_0_kiq_init_register(ring);
3791 		soc15_grbm_select(adev, 0, 0, 0, 0);
3792 		mutex_unlock(&adev->srbm_mutex);
3793 
3794 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3795 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3796 	}
3797 
3798 	return 0;
3799 }
3800 
3801 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3802 {
3803 	struct amdgpu_device *adev = ring->adev;
3804 	struct v9_mqd *mqd = ring->mqd_ptr;
3805 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3806 	struct v9_mqd *tmp_mqd;
3807 
3808 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3809 	 * is not be initialized before
3810 	 */
3811 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3812 
3813 	if (!tmp_mqd->cp_hqd_pq_control ||
3814 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3815 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3816 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3817 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3818 		mutex_lock(&adev->srbm_mutex);
3819 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3820 		gfx_v9_0_mqd_init(ring);
3821 		soc15_grbm_select(adev, 0, 0, 0, 0);
3822 		mutex_unlock(&adev->srbm_mutex);
3823 
3824 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3825 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3826 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3827 		/* reset MQD to a clean status */
3828 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3829 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3830 
3831 		/* reset ring buffer */
3832 		ring->wptr = 0;
3833 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3834 		amdgpu_ring_clear_ring(ring);
3835 	} else {
3836 		amdgpu_ring_clear_ring(ring);
3837 	}
3838 
3839 	return 0;
3840 }
3841 
3842 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3843 {
3844 	struct amdgpu_ring *ring;
3845 	int r;
3846 
3847 	ring = &adev->gfx.kiq.ring;
3848 
3849 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3850 	if (unlikely(r != 0))
3851 		return r;
3852 
3853 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3854 	if (unlikely(r != 0))
3855 		return r;
3856 
3857 	gfx_v9_0_kiq_init_queue(ring);
3858 	amdgpu_bo_kunmap(ring->mqd_obj);
3859 	ring->mqd_ptr = NULL;
3860 	amdgpu_bo_unreserve(ring->mqd_obj);
3861 	ring->sched.ready = true;
3862 	return 0;
3863 }
3864 
3865 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3866 {
3867 	struct amdgpu_ring *ring = NULL;
3868 	int r = 0, i;
3869 
3870 	gfx_v9_0_cp_compute_enable(adev, true);
3871 
3872 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3873 		ring = &adev->gfx.compute_ring[i];
3874 
3875 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3876 		if (unlikely(r != 0))
3877 			goto done;
3878 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3879 		if (!r) {
3880 			r = gfx_v9_0_kcq_init_queue(ring);
3881 			amdgpu_bo_kunmap(ring->mqd_obj);
3882 			ring->mqd_ptr = NULL;
3883 		}
3884 		amdgpu_bo_unreserve(ring->mqd_obj);
3885 		if (r)
3886 			goto done;
3887 	}
3888 
3889 	r = amdgpu_gfx_enable_kcq(adev);
3890 done:
3891 	return r;
3892 }
3893 
3894 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3895 {
3896 	int r, i;
3897 	struct amdgpu_ring *ring;
3898 
3899 	if (!(adev->flags & AMD_IS_APU))
3900 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3901 
3902 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3903 		if (adev->gfx.num_gfx_rings) {
3904 			/* legacy firmware loading */
3905 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3906 			if (r)
3907 				return r;
3908 		}
3909 
3910 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3911 		if (r)
3912 			return r;
3913 	}
3914 
3915 	r = gfx_v9_0_kiq_resume(adev);
3916 	if (r)
3917 		return r;
3918 
3919 	if (adev->gfx.num_gfx_rings) {
3920 		r = gfx_v9_0_cp_gfx_resume(adev);
3921 		if (r)
3922 			return r;
3923 	}
3924 
3925 	r = gfx_v9_0_kcq_resume(adev);
3926 	if (r)
3927 		return r;
3928 
3929 	if (adev->gfx.num_gfx_rings) {
3930 		ring = &adev->gfx.gfx_ring[0];
3931 		r = amdgpu_ring_test_helper(ring);
3932 		if (r)
3933 			return r;
3934 	}
3935 
3936 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3937 		ring = &adev->gfx.compute_ring[i];
3938 		amdgpu_ring_test_helper(ring);
3939 	}
3940 
3941 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3942 
3943 	return 0;
3944 }
3945 
3946 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3947 {
3948 	u32 tmp;
3949 
3950 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3951 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3952 		return;
3953 
3954 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3955 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3956 				adev->df.hash_status.hash_64k);
3957 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3958 				adev->df.hash_status.hash_2m);
3959 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3960 				adev->df.hash_status.hash_1g);
3961 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3962 }
3963 
3964 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3965 {
3966 	if (adev->gfx.num_gfx_rings)
3967 		gfx_v9_0_cp_gfx_enable(adev, enable);
3968 	gfx_v9_0_cp_compute_enable(adev, enable);
3969 }
3970 
3971 static int gfx_v9_0_hw_init(void *handle)
3972 {
3973 	int r;
3974 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3975 
3976 	if (!amdgpu_sriov_vf(adev))
3977 		gfx_v9_0_init_golden_registers(adev);
3978 
3979 	gfx_v9_0_constants_init(adev);
3980 
3981 	gfx_v9_0_init_tcp_config(adev);
3982 
3983 	r = adev->gfx.rlc.funcs->resume(adev);
3984 	if (r)
3985 		return r;
3986 
3987 	r = gfx_v9_0_cp_resume(adev);
3988 	if (r)
3989 		return r;
3990 
3991 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3992 		gfx_v9_4_2_set_power_brake_sequence(adev);
3993 
3994 	return r;
3995 }
3996 
3997 static int gfx_v9_0_hw_fini(void *handle)
3998 {
3999 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4000 
4001 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4002 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4003 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4004 
4005 	/* DF freeze and kcq disable will fail */
4006 	if (!amdgpu_ras_intr_triggered())
4007 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4008 		amdgpu_gfx_disable_kcq(adev);
4009 
4010 	if (amdgpu_sriov_vf(adev)) {
4011 		gfx_v9_0_cp_gfx_enable(adev, false);
4012 		/* must disable polling for SRIOV when hw finished, otherwise
4013 		 * CPC engine may still keep fetching WB address which is already
4014 		 * invalid after sw finished and trigger DMAR reading error in
4015 		 * hypervisor side.
4016 		 */
4017 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4018 		return 0;
4019 	}
4020 
4021 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4022 	 * otherwise KIQ is hanging when binding back
4023 	 */
4024 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4025 		mutex_lock(&adev->srbm_mutex);
4026 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4027 				adev->gfx.kiq.ring.pipe,
4028 				adev->gfx.kiq.ring.queue, 0);
4029 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4030 		soc15_grbm_select(adev, 0, 0, 0, 0);
4031 		mutex_unlock(&adev->srbm_mutex);
4032 	}
4033 
4034 	gfx_v9_0_cp_enable(adev, false);
4035 
4036 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4037 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4038 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4039 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4040 		return 0;
4041 	}
4042 
4043 	adev->gfx.rlc.funcs->stop(adev);
4044 	return 0;
4045 }
4046 
4047 static int gfx_v9_0_suspend(void *handle)
4048 {
4049 	return gfx_v9_0_hw_fini(handle);
4050 }
4051 
4052 static int gfx_v9_0_resume(void *handle)
4053 {
4054 	return gfx_v9_0_hw_init(handle);
4055 }
4056 
4057 static bool gfx_v9_0_is_idle(void *handle)
4058 {
4059 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4060 
4061 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4062 				GRBM_STATUS, GUI_ACTIVE))
4063 		return false;
4064 	else
4065 		return true;
4066 }
4067 
4068 static int gfx_v9_0_wait_for_idle(void *handle)
4069 {
4070 	unsigned i;
4071 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4072 
4073 	for (i = 0; i < adev->usec_timeout; i++) {
4074 		if (gfx_v9_0_is_idle(handle))
4075 			return 0;
4076 		udelay(1);
4077 	}
4078 	return -ETIMEDOUT;
4079 }
4080 
4081 static int gfx_v9_0_soft_reset(void *handle)
4082 {
4083 	u32 grbm_soft_reset = 0;
4084 	u32 tmp;
4085 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4086 
4087 	/* GRBM_STATUS */
4088 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4089 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4090 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4091 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4092 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4093 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4094 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4095 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4096 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4097 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4098 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4099 	}
4100 
4101 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4102 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4104 	}
4105 
4106 	/* GRBM_STATUS2 */
4107 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4108 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4109 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4110 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4111 
4112 
4113 	if (grbm_soft_reset) {
4114 		/* stop the rlc */
4115 		adev->gfx.rlc.funcs->stop(adev);
4116 
4117 		if (adev->gfx.num_gfx_rings)
4118 			/* Disable GFX parsing/prefetching */
4119 			gfx_v9_0_cp_gfx_enable(adev, false);
4120 
4121 		/* Disable MEC parsing/prefetching */
4122 		gfx_v9_0_cp_compute_enable(adev, false);
4123 
4124 		if (grbm_soft_reset) {
4125 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4126 			tmp |= grbm_soft_reset;
4127 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4128 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4129 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4130 
4131 			udelay(50);
4132 
4133 			tmp &= ~grbm_soft_reset;
4134 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4135 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4136 		}
4137 
4138 		/* Wait a little for things to settle down */
4139 		udelay(50);
4140 	}
4141 	return 0;
4142 }
4143 
4144 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4145 {
4146 	signed long r, cnt = 0;
4147 	unsigned long flags;
4148 	uint32_t seq, reg_val_offs = 0;
4149 	uint64_t value = 0;
4150 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4151 	struct amdgpu_ring *ring = &kiq->ring;
4152 
4153 	BUG_ON(!ring->funcs->emit_rreg);
4154 
4155 	spin_lock_irqsave(&kiq->ring_lock, flags);
4156 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4157 		pr_err("critical bug! too many kiq readers\n");
4158 		goto failed_unlock;
4159 	}
4160 	amdgpu_ring_alloc(ring, 32);
4161 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4162 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4163 				(5 << 8) |	/* dst: memory */
4164 				(1 << 16) |	/* count sel */
4165 				(1 << 20));	/* write confirm */
4166 	amdgpu_ring_write(ring, 0);
4167 	amdgpu_ring_write(ring, 0);
4168 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4169 				reg_val_offs * 4));
4170 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4171 				reg_val_offs * 4));
4172 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4173 	if (r)
4174 		goto failed_undo;
4175 
4176 	amdgpu_ring_commit(ring);
4177 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4178 
4179 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4180 
4181 	/* don't wait anymore for gpu reset case because this way may
4182 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4183 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4184 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4185 	 * gpu_recover() hang there.
4186 	 *
4187 	 * also don't wait anymore for IRQ context
4188 	 * */
4189 	if (r < 1 && (amdgpu_in_reset(adev)))
4190 		goto failed_kiq_read;
4191 
4192 	might_sleep();
4193 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4194 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4195 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4196 	}
4197 
4198 	if (cnt > MAX_KIQ_REG_TRY)
4199 		goto failed_kiq_read;
4200 
4201 	mb();
4202 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4203 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4204 	amdgpu_device_wb_free(adev, reg_val_offs);
4205 	return value;
4206 
4207 failed_undo:
4208 	amdgpu_ring_undo(ring);
4209 failed_unlock:
4210 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4211 failed_kiq_read:
4212 	if (reg_val_offs)
4213 		amdgpu_device_wb_free(adev, reg_val_offs);
4214 	pr_err("failed to read gpu clock\n");
4215 	return ~0;
4216 }
4217 
4218 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4219 {
4220 	uint64_t clock, clock_lo, clock_hi, hi_check;
4221 
4222 	switch (adev->ip_versions[GC_HWIP][0]) {
4223 	case IP_VERSION(9, 3, 0):
4224 		preempt_disable();
4225 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4226 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4227 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4228 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4229 		 * roughly every 42 seconds.
4230 		 */
4231 		if (hi_check != clock_hi) {
4232 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4233 			clock_hi = hi_check;
4234 		}
4235 		preempt_enable();
4236 		clock = clock_lo | (clock_hi << 32ULL);
4237 		break;
4238 	default:
4239 		amdgpu_gfx_off_ctrl(adev, false);
4240 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4241 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4242 			clock = gfx_v9_0_kiq_read_clock(adev);
4243 		} else {
4244 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4245 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4246 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4247 		}
4248 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4249 		amdgpu_gfx_off_ctrl(adev, true);
4250 		break;
4251 	}
4252 	return clock;
4253 }
4254 
4255 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4256 					  uint32_t vmid,
4257 					  uint32_t gds_base, uint32_t gds_size,
4258 					  uint32_t gws_base, uint32_t gws_size,
4259 					  uint32_t oa_base, uint32_t oa_size)
4260 {
4261 	struct amdgpu_device *adev = ring->adev;
4262 
4263 	/* GDS Base */
4264 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4265 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4266 				   gds_base);
4267 
4268 	/* GDS Size */
4269 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4270 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4271 				   gds_size);
4272 
4273 	/* GWS */
4274 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4275 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4276 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4277 
4278 	/* OA */
4279 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4280 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4281 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4282 }
4283 
4284 static const u32 vgpr_init_compute_shader[] =
4285 {
4286 	0xb07c0000, 0xbe8000ff,
4287 	0x000000f8, 0xbf110800,
4288 	0x7e000280, 0x7e020280,
4289 	0x7e040280, 0x7e060280,
4290 	0x7e080280, 0x7e0a0280,
4291 	0x7e0c0280, 0x7e0e0280,
4292 	0x80808800, 0xbe803200,
4293 	0xbf84fff5, 0xbf9c0000,
4294 	0xd28c0001, 0x0001007f,
4295 	0xd28d0001, 0x0002027e,
4296 	0x10020288, 0xb8810904,
4297 	0xb7814000, 0xd1196a01,
4298 	0x00000301, 0xbe800087,
4299 	0xbefc00c1, 0xd89c4000,
4300 	0x00020201, 0xd89cc080,
4301 	0x00040401, 0x320202ff,
4302 	0x00000800, 0x80808100,
4303 	0xbf84fff8, 0x7e020280,
4304 	0xbf810000, 0x00000000,
4305 };
4306 
4307 static const u32 sgpr_init_compute_shader[] =
4308 {
4309 	0xb07c0000, 0xbe8000ff,
4310 	0x0000005f, 0xbee50080,
4311 	0xbe812c65, 0xbe822c65,
4312 	0xbe832c65, 0xbe842c65,
4313 	0xbe852c65, 0xb77c0005,
4314 	0x80808500, 0xbf84fff8,
4315 	0xbe800080, 0xbf810000,
4316 };
4317 
4318 static const u32 vgpr_init_compute_shader_arcturus[] = {
4319 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4320 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4321 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4322 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4323 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4324 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4325 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4326 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4327 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4328 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4329 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4330 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4331 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4332 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4333 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4334 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4335 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4336 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4337 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4338 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4339 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4340 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4341 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4342 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4343 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4344 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4345 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4346 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4347 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4348 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4349 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4350 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4351 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4352 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4353 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4354 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4355 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4356 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4357 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4358 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4359 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4360 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4361 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4362 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4363 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4364 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4365 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4366 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4367 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4368 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4369 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4370 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4371 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4372 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4373 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4374 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4375 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4376 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4377 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4378 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4379 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4380 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4381 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4382 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4383 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4384 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4385 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4386 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4387 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4388 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4389 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4390 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4391 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4392 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4393 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4394 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4395 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4396 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4397 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4398 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4399 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4400 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4401 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4402 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4403 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4404 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4405 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4406 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4407 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4408 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4409 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4410 	0xbf84fff8, 0xbf810000,
4411 };
4412 
4413 /* When below register arrays changed, please update gpr_reg_size,
4414   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4415   to cover all gfx9 ASICs */
4416 static const struct soc15_reg_entry vgpr_init_regs[] = {
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4431 };
4432 
4433 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4448 };
4449 
4450 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4465 };
4466 
4467 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4482 };
4483 
4484 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4485    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4486    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4487    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4488    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4489    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4490    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4499    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4500    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4501    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4502    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4503    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4504    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4508    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4510    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4511    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4512    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4513    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4514    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4515    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4516    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4517    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4518 };
4519 
4520 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4521 {
4522 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4523 	int i, r;
4524 
4525 	/* only support when RAS is enabled */
4526 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4527 		return 0;
4528 
4529 	r = amdgpu_ring_alloc(ring, 7);
4530 	if (r) {
4531 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4532 			ring->name, r);
4533 		return r;
4534 	}
4535 
4536 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4537 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4538 
4539 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4540 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4541 				PACKET3_DMA_DATA_DST_SEL(1) |
4542 				PACKET3_DMA_DATA_SRC_SEL(2) |
4543 				PACKET3_DMA_DATA_ENGINE(0)));
4544 	amdgpu_ring_write(ring, 0);
4545 	amdgpu_ring_write(ring, 0);
4546 	amdgpu_ring_write(ring, 0);
4547 	amdgpu_ring_write(ring, 0);
4548 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4549 				adev->gds.gds_size);
4550 
4551 	amdgpu_ring_commit(ring);
4552 
4553 	for (i = 0; i < adev->usec_timeout; i++) {
4554 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4555 			break;
4556 		udelay(1);
4557 	}
4558 
4559 	if (i >= adev->usec_timeout)
4560 		r = -ETIMEDOUT;
4561 
4562 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4563 
4564 	return r;
4565 }
4566 
4567 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4568 {
4569 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4570 	struct amdgpu_ib ib;
4571 	struct dma_fence *f = NULL;
4572 	int r, i;
4573 	unsigned total_size, vgpr_offset, sgpr_offset;
4574 	u64 gpu_addr;
4575 
4576 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4577 						adev->gfx.config.max_cu_per_sh *
4578 						adev->gfx.config.max_sh_per_se;
4579 	int sgpr_work_group_size = 5;
4580 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4581 	int vgpr_init_shader_size;
4582 	const u32 *vgpr_init_shader_ptr;
4583 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4584 
4585 	/* only support when RAS is enabled */
4586 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4587 		return 0;
4588 
4589 	/* bail if the compute ring is not ready */
4590 	if (!ring->sched.ready)
4591 		return 0;
4592 
4593 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4594 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4595 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4596 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4597 	} else {
4598 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4599 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4600 		vgpr_init_regs_ptr = vgpr_init_regs;
4601 	}
4602 
4603 	total_size =
4604 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4605 	total_size +=
4606 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4607 	total_size +=
4608 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4609 	total_size = ALIGN(total_size, 256);
4610 	vgpr_offset = total_size;
4611 	total_size += ALIGN(vgpr_init_shader_size, 256);
4612 	sgpr_offset = total_size;
4613 	total_size += sizeof(sgpr_init_compute_shader);
4614 
4615 	/* allocate an indirect buffer to put the commands in */
4616 	memset(&ib, 0, sizeof(ib));
4617 	r = amdgpu_ib_get(adev, NULL, total_size,
4618 					AMDGPU_IB_POOL_DIRECT, &ib);
4619 	if (r) {
4620 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4621 		return r;
4622 	}
4623 
4624 	/* load the compute shaders */
4625 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4626 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4627 
4628 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4629 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4630 
4631 	/* init the ib length to 0 */
4632 	ib.length_dw = 0;
4633 
4634 	/* VGPR */
4635 	/* write the register state for the compute dispatch */
4636 	for (i = 0; i < gpr_reg_size; i++) {
4637 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4638 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4639 								- PACKET3_SET_SH_REG_START;
4640 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4641 	}
4642 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4643 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4644 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4645 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4646 							- PACKET3_SET_SH_REG_START;
4647 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4648 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4649 
4650 	/* write dispatch packet */
4651 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4652 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4653 	ib.ptr[ib.length_dw++] = 1; /* y */
4654 	ib.ptr[ib.length_dw++] = 1; /* z */
4655 	ib.ptr[ib.length_dw++] =
4656 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4657 
4658 	/* write CS partial flush packet */
4659 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4660 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4661 
4662 	/* SGPR1 */
4663 	/* write the register state for the compute dispatch */
4664 	for (i = 0; i < gpr_reg_size; i++) {
4665 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4666 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4667 								- PACKET3_SET_SH_REG_START;
4668 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4669 	}
4670 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4671 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4672 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4673 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4674 							- PACKET3_SET_SH_REG_START;
4675 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4676 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4677 
4678 	/* write dispatch packet */
4679 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4680 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4681 	ib.ptr[ib.length_dw++] = 1; /* y */
4682 	ib.ptr[ib.length_dw++] = 1; /* z */
4683 	ib.ptr[ib.length_dw++] =
4684 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4685 
4686 	/* write CS partial flush packet */
4687 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4688 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4689 
4690 	/* SGPR2 */
4691 	/* write the register state for the compute dispatch */
4692 	for (i = 0; i < gpr_reg_size; i++) {
4693 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4694 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4695 								- PACKET3_SET_SH_REG_START;
4696 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4697 	}
4698 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4699 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4700 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4701 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4702 							- PACKET3_SET_SH_REG_START;
4703 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4704 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4705 
4706 	/* write dispatch packet */
4707 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4708 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4709 	ib.ptr[ib.length_dw++] = 1; /* y */
4710 	ib.ptr[ib.length_dw++] = 1; /* z */
4711 	ib.ptr[ib.length_dw++] =
4712 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4713 
4714 	/* write CS partial flush packet */
4715 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4716 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4717 
4718 	/* shedule the ib on the ring */
4719 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4720 	if (r) {
4721 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4722 		goto fail;
4723 	}
4724 
4725 	/* wait for the GPU to finish processing the IB */
4726 	r = dma_fence_wait(f, false);
4727 	if (r) {
4728 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4729 		goto fail;
4730 	}
4731 
4732 fail:
4733 	amdgpu_ib_free(adev, &ib, NULL);
4734 	dma_fence_put(f);
4735 
4736 	return r;
4737 }
4738 
4739 static int gfx_v9_0_early_init(void *handle)
4740 {
4741 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4742 
4743 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4744 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4745 		adev->gfx.num_gfx_rings = 0;
4746 	else
4747 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4748 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4749 					  AMDGPU_MAX_COMPUTE_RINGS);
4750 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4751 	gfx_v9_0_set_ring_funcs(adev);
4752 	gfx_v9_0_set_irq_funcs(adev);
4753 	gfx_v9_0_set_gds_init(adev);
4754 	gfx_v9_0_set_rlc_funcs(adev);
4755 
4756 	/* init rlcg reg access ctrl */
4757 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4758 
4759 	return 0;
4760 }
4761 
4762 static int gfx_v9_0_ecc_late_init(void *handle)
4763 {
4764 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4765 	int r;
4766 
4767 	/*
4768 	 * Temp workaround to fix the issue that CP firmware fails to
4769 	 * update read pointer when CPDMA is writing clearing operation
4770 	 * to GDS in suspend/resume sequence on several cards. So just
4771 	 * limit this operation in cold boot sequence.
4772 	 */
4773 	if ((!adev->in_suspend) &&
4774 	    (adev->gds.gds_size)) {
4775 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4776 		if (r)
4777 			return r;
4778 	}
4779 
4780 	/* requires IBs so do in late init after IB pool is initialized */
4781 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4782 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4783 	else
4784 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4785 
4786 	if (r)
4787 		return r;
4788 
4789 	if (adev->gfx.ras &&
4790 	    adev->gfx.ras->enable_watchdog_timer)
4791 		adev->gfx.ras->enable_watchdog_timer(adev);
4792 
4793 	return 0;
4794 }
4795 
4796 static int gfx_v9_0_late_init(void *handle)
4797 {
4798 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4799 	int r;
4800 
4801 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4802 	if (r)
4803 		return r;
4804 
4805 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4806 	if (r)
4807 		return r;
4808 
4809 	r = gfx_v9_0_ecc_late_init(handle);
4810 	if (r)
4811 		return r;
4812 
4813 	return 0;
4814 }
4815 
4816 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4817 {
4818 	uint32_t rlc_setting;
4819 
4820 	/* if RLC is not enabled, do nothing */
4821 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4822 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4823 		return false;
4824 
4825 	return true;
4826 }
4827 
4828 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4829 {
4830 	uint32_t data;
4831 	unsigned i;
4832 
4833 	data = RLC_SAFE_MODE__CMD_MASK;
4834 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4835 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4836 
4837 	/* wait for RLC_SAFE_MODE */
4838 	for (i = 0; i < adev->usec_timeout; i++) {
4839 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4840 			break;
4841 		udelay(1);
4842 	}
4843 }
4844 
4845 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4846 {
4847 	uint32_t data;
4848 
4849 	data = RLC_SAFE_MODE__CMD_MASK;
4850 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4851 }
4852 
4853 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4854 						bool enable)
4855 {
4856 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4857 
4858 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4859 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4860 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4861 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4862 	} else {
4863 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4864 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4865 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4866 	}
4867 
4868 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4869 }
4870 
4871 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4872 						bool enable)
4873 {
4874 	/* TODO: double check if we need to perform under safe mode */
4875 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4876 
4877 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4878 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4879 	else
4880 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4881 
4882 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4883 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4884 	else
4885 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4886 
4887 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4888 }
4889 
4890 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4891 						      bool enable)
4892 {
4893 	uint32_t data, def;
4894 
4895 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4896 
4897 	/* It is disabled by HW by default */
4898 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4899 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4900 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4901 
4902 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4903 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4904 
4905 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4906 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4907 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4908 
4909 		/* only for Vega10 & Raven1 */
4910 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4911 
4912 		if (def != data)
4913 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4914 
4915 		/* MGLS is a global flag to control all MGLS in GFX */
4916 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4917 			/* 2 - RLC memory Light sleep */
4918 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4919 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4920 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4921 				if (def != data)
4922 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4923 			}
4924 			/* 3 - CP memory Light sleep */
4925 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4926 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4927 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4928 				if (def != data)
4929 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4930 			}
4931 		}
4932 	} else {
4933 		/* 1 - MGCG_OVERRIDE */
4934 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4935 
4936 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4937 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4938 
4939 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4940 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4941 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4942 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4943 
4944 		if (def != data)
4945 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4946 
4947 		/* 2 - disable MGLS in RLC */
4948 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4949 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4950 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4951 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4952 		}
4953 
4954 		/* 3 - disable MGLS in CP */
4955 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4956 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4957 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4958 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4959 		}
4960 	}
4961 
4962 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4963 }
4964 
4965 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4966 					   bool enable)
4967 {
4968 	uint32_t data, def;
4969 
4970 	if (!adev->gfx.num_gfx_rings)
4971 		return;
4972 
4973 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4974 
4975 	/* Enable 3D CGCG/CGLS */
4976 	if (enable) {
4977 		/* write cmd to clear cgcg/cgls ov */
4978 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4979 		/* unset CGCG override */
4980 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4981 		/* update CGCG and CGLS override bits */
4982 		if (def != data)
4983 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4984 
4985 		/* enable 3Dcgcg FSM(0x0000363f) */
4986 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4987 
4988 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4989 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4990 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4991 		else
4992 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4993 
4994 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4995 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4996 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4997 		if (def != data)
4998 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4999 
5000 		/* set IDLE_POLL_COUNT(0x00900100) */
5001 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5002 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5003 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5004 		if (def != data)
5005 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5006 	} else {
5007 		/* Disable CGCG/CGLS */
5008 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5009 		/* disable cgcg, cgls should be disabled */
5010 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5011 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5012 		/* disable cgcg and cgls in FSM */
5013 		if (def != data)
5014 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5015 	}
5016 
5017 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5018 }
5019 
5020 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5021 						      bool enable)
5022 {
5023 	uint32_t def, data;
5024 
5025 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5026 
5027 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5028 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5029 		/* unset CGCG override */
5030 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5031 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5032 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5033 		else
5034 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5035 		/* update CGCG and CGLS override bits */
5036 		if (def != data)
5037 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5038 
5039 		/* enable cgcg FSM(0x0000363F) */
5040 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5041 
5042 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5043 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5044 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5045 		else
5046 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5047 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5048 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5049 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5050 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5051 		if (def != data)
5052 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5053 
5054 		/* set IDLE_POLL_COUNT(0x00900100) */
5055 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5056 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5057 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5058 		if (def != data)
5059 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5060 	} else {
5061 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5062 		/* reset CGCG/CGLS bits */
5063 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5064 		/* disable cgcg and cgls in FSM */
5065 		if (def != data)
5066 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5067 	}
5068 
5069 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5070 }
5071 
5072 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5073 					    bool enable)
5074 {
5075 	if (enable) {
5076 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5077 		 * ===  MGCG + MGLS ===
5078 		 */
5079 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5080 		/* ===  CGCG /CGLS for GFX 3D Only === */
5081 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5082 		/* ===  CGCG + CGLS === */
5083 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5084 	} else {
5085 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5086 		 * ===  CGCG + CGLS ===
5087 		 */
5088 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5089 		/* ===  CGCG /CGLS for GFX 3D Only === */
5090 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5091 		/* ===  MGCG + MGLS === */
5092 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5093 	}
5094 	return 0;
5095 }
5096 
5097 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5098 {
5099 	u32 reg, data;
5100 
5101 	amdgpu_gfx_off_ctrl(adev, false);
5102 
5103 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5104 	if (amdgpu_sriov_is_pp_one_vf(adev))
5105 		data = RREG32_NO_KIQ(reg);
5106 	else
5107 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5108 
5109 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5110 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5111 
5112 	if (amdgpu_sriov_is_pp_one_vf(adev))
5113 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5114 	else
5115 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5116 
5117 	amdgpu_gfx_off_ctrl(adev, true);
5118 }
5119 
5120 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5121 					uint32_t offset,
5122 					struct soc15_reg_rlcg *entries, int arr_size)
5123 {
5124 	int i;
5125 	uint32_t reg;
5126 
5127 	if (!entries)
5128 		return false;
5129 
5130 	for (i = 0; i < arr_size; i++) {
5131 		const struct soc15_reg_rlcg *entry;
5132 
5133 		entry = &entries[i];
5134 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5135 		if (offset == reg)
5136 			return true;
5137 	}
5138 
5139 	return false;
5140 }
5141 
5142 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5143 {
5144 	return gfx_v9_0_check_rlcg_range(adev, offset,
5145 					(void *)rlcg_access_gc_9_0,
5146 					ARRAY_SIZE(rlcg_access_gc_9_0));
5147 }
5148 
5149 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5150 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5151 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5152 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5153 	.init = gfx_v9_0_rlc_init,
5154 	.get_csb_size = gfx_v9_0_get_csb_size,
5155 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5156 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5157 	.resume = gfx_v9_0_rlc_resume,
5158 	.stop = gfx_v9_0_rlc_stop,
5159 	.reset = gfx_v9_0_rlc_reset,
5160 	.start = gfx_v9_0_rlc_start,
5161 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5162 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5163 };
5164 
5165 static int gfx_v9_0_set_powergating_state(void *handle,
5166 					  enum amd_powergating_state state)
5167 {
5168 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5169 	bool enable = (state == AMD_PG_STATE_GATE);
5170 
5171 	switch (adev->ip_versions[GC_HWIP][0]) {
5172 	case IP_VERSION(9, 2, 2):
5173 	case IP_VERSION(9, 1, 0):
5174 	case IP_VERSION(9, 3, 0):
5175 		if (!enable)
5176 			amdgpu_gfx_off_ctrl(adev, false);
5177 
5178 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5179 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5180 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5181 		} else {
5182 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5183 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5184 		}
5185 
5186 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5187 			gfx_v9_0_enable_cp_power_gating(adev, true);
5188 		else
5189 			gfx_v9_0_enable_cp_power_gating(adev, false);
5190 
5191 		/* update gfx cgpg state */
5192 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5193 
5194 		/* update mgcg state */
5195 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5196 
5197 		if (enable)
5198 			amdgpu_gfx_off_ctrl(adev, true);
5199 		break;
5200 	case IP_VERSION(9, 2, 1):
5201 		amdgpu_gfx_off_ctrl(adev, enable);
5202 		break;
5203 	default:
5204 		break;
5205 	}
5206 
5207 	return 0;
5208 }
5209 
5210 static int gfx_v9_0_set_clockgating_state(void *handle,
5211 					  enum amd_clockgating_state state)
5212 {
5213 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5214 
5215 	if (amdgpu_sriov_vf(adev))
5216 		return 0;
5217 
5218 	switch (adev->ip_versions[GC_HWIP][0]) {
5219 	case IP_VERSION(9, 0, 1):
5220 	case IP_VERSION(9, 2, 1):
5221 	case IP_VERSION(9, 4, 0):
5222 	case IP_VERSION(9, 2, 2):
5223 	case IP_VERSION(9, 1, 0):
5224 	case IP_VERSION(9, 4, 1):
5225 	case IP_VERSION(9, 3, 0):
5226 	case IP_VERSION(9, 4, 2):
5227 		gfx_v9_0_update_gfx_clock_gating(adev,
5228 						 state == AMD_CG_STATE_GATE);
5229 		break;
5230 	default:
5231 		break;
5232 	}
5233 	return 0;
5234 }
5235 
5236 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5237 {
5238 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 	int data;
5240 
5241 	if (amdgpu_sriov_vf(adev))
5242 		*flags = 0;
5243 
5244 	/* AMD_CG_SUPPORT_GFX_MGCG */
5245 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5246 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5247 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5248 
5249 	/* AMD_CG_SUPPORT_GFX_CGCG */
5250 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5251 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5252 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5253 
5254 	/* AMD_CG_SUPPORT_GFX_CGLS */
5255 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5256 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5257 
5258 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5259 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5260 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5261 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5262 
5263 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5264 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5265 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5266 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5267 
5268 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5269 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5270 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5271 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5272 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5273 
5274 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5275 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5276 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5277 	}
5278 }
5279 
5280 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5281 {
5282 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5283 }
5284 
5285 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5286 {
5287 	struct amdgpu_device *adev = ring->adev;
5288 	u64 wptr;
5289 
5290 	/* XXX check if swapping is necessary on BE */
5291 	if (ring->use_doorbell) {
5292 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5293 	} else {
5294 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5295 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5296 	}
5297 
5298 	return wptr;
5299 }
5300 
5301 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5302 {
5303 	struct amdgpu_device *adev = ring->adev;
5304 
5305 	if (ring->use_doorbell) {
5306 		/* XXX check if swapping is necessary on BE */
5307 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5308 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5309 	} else {
5310 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5311 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5312 	}
5313 }
5314 
5315 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5316 {
5317 	struct amdgpu_device *adev = ring->adev;
5318 	u32 ref_and_mask, reg_mem_engine;
5319 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5320 
5321 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5322 		switch (ring->me) {
5323 		case 1:
5324 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5325 			break;
5326 		case 2:
5327 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5328 			break;
5329 		default:
5330 			return;
5331 		}
5332 		reg_mem_engine = 0;
5333 	} else {
5334 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5335 		reg_mem_engine = 1; /* pfp */
5336 	}
5337 
5338 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5339 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5340 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5341 			      ref_and_mask, ref_and_mask, 0x20);
5342 }
5343 
5344 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5345 					struct amdgpu_job *job,
5346 					struct amdgpu_ib *ib,
5347 					uint32_t flags)
5348 {
5349 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5350 	u32 header, control = 0;
5351 
5352 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5353 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5354 	else
5355 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5356 
5357 	control |= ib->length_dw | (vmid << 24);
5358 
5359 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5360 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5361 
5362 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5363 			gfx_v9_0_ring_emit_de_meta(ring);
5364 	}
5365 
5366 	amdgpu_ring_write(ring, header);
5367 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5368 	amdgpu_ring_write(ring,
5369 #ifdef __BIG_ENDIAN
5370 		(2 << 0) |
5371 #endif
5372 		lower_32_bits(ib->gpu_addr));
5373 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5374 	amdgpu_ring_write(ring, control);
5375 }
5376 
5377 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5378 					  struct amdgpu_job *job,
5379 					  struct amdgpu_ib *ib,
5380 					  uint32_t flags)
5381 {
5382 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5383 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5384 
5385 	/* Currently, there is a high possibility to get wave ID mismatch
5386 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5387 	 * different wave IDs than the GDS expects. This situation happens
5388 	 * randomly when at least 5 compute pipes use GDS ordered append.
5389 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5390 	 * Those are probably bugs somewhere else in the kernel driver.
5391 	 *
5392 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5393 	 * GDS to 0 for this ring (me/pipe).
5394 	 */
5395 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5396 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5397 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5398 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5399 	}
5400 
5401 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5402 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5403 	amdgpu_ring_write(ring,
5404 #ifdef __BIG_ENDIAN
5405 				(2 << 0) |
5406 #endif
5407 				lower_32_bits(ib->gpu_addr));
5408 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5409 	amdgpu_ring_write(ring, control);
5410 }
5411 
5412 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5413 				     u64 seq, unsigned flags)
5414 {
5415 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5416 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5417 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5418 
5419 	/* RELEASE_MEM - flush caches, send int */
5420 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5421 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5422 					       EOP_TC_NC_ACTION_EN) :
5423 					      (EOP_TCL1_ACTION_EN |
5424 					       EOP_TC_ACTION_EN |
5425 					       EOP_TC_WB_ACTION_EN |
5426 					       EOP_TC_MD_ACTION_EN)) |
5427 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5428 				 EVENT_INDEX(5)));
5429 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5430 
5431 	/*
5432 	 * the address should be Qword aligned if 64bit write, Dword
5433 	 * aligned if only send 32bit data low (discard data high)
5434 	 */
5435 	if (write64bit)
5436 		BUG_ON(addr & 0x7);
5437 	else
5438 		BUG_ON(addr & 0x3);
5439 	amdgpu_ring_write(ring, lower_32_bits(addr));
5440 	amdgpu_ring_write(ring, upper_32_bits(addr));
5441 	amdgpu_ring_write(ring, lower_32_bits(seq));
5442 	amdgpu_ring_write(ring, upper_32_bits(seq));
5443 	amdgpu_ring_write(ring, 0);
5444 }
5445 
5446 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5447 {
5448 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5449 	uint32_t seq = ring->fence_drv.sync_seq;
5450 	uint64_t addr = ring->fence_drv.gpu_addr;
5451 
5452 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5453 			      lower_32_bits(addr), upper_32_bits(addr),
5454 			      seq, 0xffffffff, 4);
5455 }
5456 
5457 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5458 					unsigned vmid, uint64_t pd_addr)
5459 {
5460 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5461 
5462 	/* compute doesn't have PFP */
5463 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5464 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5465 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5466 		amdgpu_ring_write(ring, 0x0);
5467 	}
5468 }
5469 
5470 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5471 {
5472 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5473 }
5474 
5475 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5476 {
5477 	u64 wptr;
5478 
5479 	/* XXX check if swapping is necessary on BE */
5480 	if (ring->use_doorbell)
5481 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5482 	else
5483 		BUG();
5484 	return wptr;
5485 }
5486 
5487 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5488 {
5489 	struct amdgpu_device *adev = ring->adev;
5490 
5491 	/* XXX check if swapping is necessary on BE */
5492 	if (ring->use_doorbell) {
5493 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5494 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5495 	} else{
5496 		BUG(); /* only DOORBELL method supported on gfx9 now */
5497 	}
5498 }
5499 
5500 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5501 					 u64 seq, unsigned int flags)
5502 {
5503 	struct amdgpu_device *adev = ring->adev;
5504 
5505 	/* we only allocate 32bit for each seq wb address */
5506 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5507 
5508 	/* write fence seq to the "addr" */
5509 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5510 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5511 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5512 	amdgpu_ring_write(ring, lower_32_bits(addr));
5513 	amdgpu_ring_write(ring, upper_32_bits(addr));
5514 	amdgpu_ring_write(ring, lower_32_bits(seq));
5515 
5516 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5517 		/* set register to trigger INT */
5518 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5519 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5520 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5521 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5522 		amdgpu_ring_write(ring, 0);
5523 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5524 	}
5525 }
5526 
5527 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5528 {
5529 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5530 	amdgpu_ring_write(ring, 0);
5531 }
5532 
5533 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5534 {
5535 	struct v9_ce_ib_state ce_payload = {0};
5536 	uint64_t csa_addr;
5537 	int cnt;
5538 
5539 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5540 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5541 
5542 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5543 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5544 				 WRITE_DATA_DST_SEL(8) |
5545 				 WR_CONFIRM) |
5546 				 WRITE_DATA_CACHE_POLICY(0));
5547 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5548 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5549 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5550 }
5551 
5552 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5553 {
5554 	struct v9_de_ib_state de_payload = {0};
5555 	uint64_t csa_addr, gds_addr;
5556 	int cnt;
5557 
5558 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5559 	gds_addr = csa_addr + 4096;
5560 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5561 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5562 
5563 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5564 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5565 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5566 				 WRITE_DATA_DST_SEL(8) |
5567 				 WR_CONFIRM) |
5568 				 WRITE_DATA_CACHE_POLICY(0));
5569 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5570 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5571 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5572 }
5573 
5574 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5575 				   bool secure)
5576 {
5577 	uint32_t v = secure ? FRAME_TMZ : 0;
5578 
5579 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5580 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5581 }
5582 
5583 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5584 {
5585 	uint32_t dw2 = 0;
5586 
5587 	if (amdgpu_sriov_vf(ring->adev))
5588 		gfx_v9_0_ring_emit_ce_meta(ring);
5589 
5590 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5591 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5592 		/* set load_global_config & load_global_uconfig */
5593 		dw2 |= 0x8001;
5594 		/* set load_cs_sh_regs */
5595 		dw2 |= 0x01000000;
5596 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5597 		dw2 |= 0x10002;
5598 
5599 		/* set load_ce_ram if preamble presented */
5600 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5601 			dw2 |= 0x10000000;
5602 	} else {
5603 		/* still load_ce_ram if this is the first time preamble presented
5604 		 * although there is no context switch happens.
5605 		 */
5606 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5607 			dw2 |= 0x10000000;
5608 	}
5609 
5610 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5611 	amdgpu_ring_write(ring, dw2);
5612 	amdgpu_ring_write(ring, 0);
5613 }
5614 
5615 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5616 {
5617 	unsigned ret;
5618 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5619 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5620 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5621 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5622 	ret = ring->wptr & ring->buf_mask;
5623 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5624 	return ret;
5625 }
5626 
5627 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5628 {
5629 	unsigned cur;
5630 	BUG_ON(offset > ring->buf_mask);
5631 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5632 
5633 	cur = (ring->wptr & ring->buf_mask) - 1;
5634 	if (likely(cur > offset))
5635 		ring->ring[offset] = cur - offset;
5636 	else
5637 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5638 }
5639 
5640 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5641 				    uint32_t reg_val_offs)
5642 {
5643 	struct amdgpu_device *adev = ring->adev;
5644 
5645 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5646 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5647 				(5 << 8) |	/* dst: memory */
5648 				(1 << 20));	/* write confirm */
5649 	amdgpu_ring_write(ring, reg);
5650 	amdgpu_ring_write(ring, 0);
5651 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5652 				reg_val_offs * 4));
5653 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5654 				reg_val_offs * 4));
5655 }
5656 
5657 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5658 				    uint32_t val)
5659 {
5660 	uint32_t cmd = 0;
5661 
5662 	switch (ring->funcs->type) {
5663 	case AMDGPU_RING_TYPE_GFX:
5664 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5665 		break;
5666 	case AMDGPU_RING_TYPE_KIQ:
5667 		cmd = (1 << 16); /* no inc addr */
5668 		break;
5669 	default:
5670 		cmd = WR_CONFIRM;
5671 		break;
5672 	}
5673 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674 	amdgpu_ring_write(ring, cmd);
5675 	amdgpu_ring_write(ring, reg);
5676 	amdgpu_ring_write(ring, 0);
5677 	amdgpu_ring_write(ring, val);
5678 }
5679 
5680 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5681 					uint32_t val, uint32_t mask)
5682 {
5683 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5684 }
5685 
5686 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5687 						  uint32_t reg0, uint32_t reg1,
5688 						  uint32_t ref, uint32_t mask)
5689 {
5690 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5691 	struct amdgpu_device *adev = ring->adev;
5692 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5693 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5694 
5695 	if (fw_version_ok)
5696 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5697 				      ref, mask, 0x20);
5698 	else
5699 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5700 							   ref, mask);
5701 }
5702 
5703 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5704 {
5705 	struct amdgpu_device *adev = ring->adev;
5706 	uint32_t value = 0;
5707 
5708 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5709 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5710 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5711 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5712 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5713 }
5714 
5715 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5716 						 enum amdgpu_interrupt_state state)
5717 {
5718 	switch (state) {
5719 	case AMDGPU_IRQ_STATE_DISABLE:
5720 	case AMDGPU_IRQ_STATE_ENABLE:
5721 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5722 			       TIME_STAMP_INT_ENABLE,
5723 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5724 		break;
5725 	default:
5726 		break;
5727 	}
5728 }
5729 
5730 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5731 						     int me, int pipe,
5732 						     enum amdgpu_interrupt_state state)
5733 {
5734 	u32 mec_int_cntl, mec_int_cntl_reg;
5735 
5736 	/*
5737 	 * amdgpu controls only the first MEC. That's why this function only
5738 	 * handles the setting of interrupts for this specific MEC. All other
5739 	 * pipes' interrupts are set by amdkfd.
5740 	 */
5741 
5742 	if (me == 1) {
5743 		switch (pipe) {
5744 		case 0:
5745 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5746 			break;
5747 		case 1:
5748 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5749 			break;
5750 		case 2:
5751 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5752 			break;
5753 		case 3:
5754 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5755 			break;
5756 		default:
5757 			DRM_DEBUG("invalid pipe %d\n", pipe);
5758 			return;
5759 		}
5760 	} else {
5761 		DRM_DEBUG("invalid me %d\n", me);
5762 		return;
5763 	}
5764 
5765 	switch (state) {
5766 	case AMDGPU_IRQ_STATE_DISABLE:
5767 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5768 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5769 					     TIME_STAMP_INT_ENABLE, 0);
5770 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5771 		break;
5772 	case AMDGPU_IRQ_STATE_ENABLE:
5773 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5774 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5775 					     TIME_STAMP_INT_ENABLE, 1);
5776 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5777 		break;
5778 	default:
5779 		break;
5780 	}
5781 }
5782 
5783 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5784 					     struct amdgpu_irq_src *source,
5785 					     unsigned type,
5786 					     enum amdgpu_interrupt_state state)
5787 {
5788 	switch (state) {
5789 	case AMDGPU_IRQ_STATE_DISABLE:
5790 	case AMDGPU_IRQ_STATE_ENABLE:
5791 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5792 			       PRIV_REG_INT_ENABLE,
5793 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5794 		break;
5795 	default:
5796 		break;
5797 	}
5798 
5799 	return 0;
5800 }
5801 
5802 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5803 					      struct amdgpu_irq_src *source,
5804 					      unsigned type,
5805 					      enum amdgpu_interrupt_state state)
5806 {
5807 	switch (state) {
5808 	case AMDGPU_IRQ_STATE_DISABLE:
5809 	case AMDGPU_IRQ_STATE_ENABLE:
5810 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5811 			       PRIV_INSTR_INT_ENABLE,
5812 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5813 		break;
5814 	default:
5815 		break;
5816 	}
5817 
5818 	return 0;
5819 }
5820 
5821 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5822 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5823 			CP_ECC_ERROR_INT_ENABLE, 1)
5824 
5825 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5826 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5827 			CP_ECC_ERROR_INT_ENABLE, 0)
5828 
5829 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5830 					      struct amdgpu_irq_src *source,
5831 					      unsigned type,
5832 					      enum amdgpu_interrupt_state state)
5833 {
5834 	switch (state) {
5835 	case AMDGPU_IRQ_STATE_DISABLE:
5836 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5837 				CP_ECC_ERROR_INT_ENABLE, 0);
5838 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5839 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5840 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5841 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5842 		break;
5843 
5844 	case AMDGPU_IRQ_STATE_ENABLE:
5845 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5846 				CP_ECC_ERROR_INT_ENABLE, 1);
5847 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5848 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5849 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5850 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5851 		break;
5852 	default:
5853 		break;
5854 	}
5855 
5856 	return 0;
5857 }
5858 
5859 
5860 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5861 					    struct amdgpu_irq_src *src,
5862 					    unsigned type,
5863 					    enum amdgpu_interrupt_state state)
5864 {
5865 	switch (type) {
5866 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5867 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5868 		break;
5869 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5870 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5871 		break;
5872 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5873 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5874 		break;
5875 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5876 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5877 		break;
5878 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5879 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5880 		break;
5881 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5882 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5883 		break;
5884 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5885 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5886 		break;
5887 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5888 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5889 		break;
5890 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5891 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5892 		break;
5893 	default:
5894 		break;
5895 	}
5896 	return 0;
5897 }
5898 
5899 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5900 			    struct amdgpu_irq_src *source,
5901 			    struct amdgpu_iv_entry *entry)
5902 {
5903 	int i;
5904 	u8 me_id, pipe_id, queue_id;
5905 	struct amdgpu_ring *ring;
5906 
5907 	DRM_DEBUG("IH: CP EOP\n");
5908 	me_id = (entry->ring_id & 0x0c) >> 2;
5909 	pipe_id = (entry->ring_id & 0x03) >> 0;
5910 	queue_id = (entry->ring_id & 0x70) >> 4;
5911 
5912 	switch (me_id) {
5913 	case 0:
5914 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5915 		break;
5916 	case 1:
5917 	case 2:
5918 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5919 			ring = &adev->gfx.compute_ring[i];
5920 			/* Per-queue interrupt is supported for MEC starting from VI.
5921 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5922 			  */
5923 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5924 				amdgpu_fence_process(ring);
5925 		}
5926 		break;
5927 	}
5928 	return 0;
5929 }
5930 
5931 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5932 			   struct amdgpu_iv_entry *entry)
5933 {
5934 	u8 me_id, pipe_id, queue_id;
5935 	struct amdgpu_ring *ring;
5936 	int i;
5937 
5938 	me_id = (entry->ring_id & 0x0c) >> 2;
5939 	pipe_id = (entry->ring_id & 0x03) >> 0;
5940 	queue_id = (entry->ring_id & 0x70) >> 4;
5941 
5942 	switch (me_id) {
5943 	case 0:
5944 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5945 		break;
5946 	case 1:
5947 	case 2:
5948 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5949 			ring = &adev->gfx.compute_ring[i];
5950 			if (ring->me == me_id && ring->pipe == pipe_id &&
5951 			    ring->queue == queue_id)
5952 				drm_sched_fault(&ring->sched);
5953 		}
5954 		break;
5955 	}
5956 }
5957 
5958 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5959 				 struct amdgpu_irq_src *source,
5960 				 struct amdgpu_iv_entry *entry)
5961 {
5962 	DRM_ERROR("Illegal register access in command stream\n");
5963 	gfx_v9_0_fault(adev, entry);
5964 	return 0;
5965 }
5966 
5967 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5968 				  struct amdgpu_irq_src *source,
5969 				  struct amdgpu_iv_entry *entry)
5970 {
5971 	DRM_ERROR("Illegal instruction in command stream\n");
5972 	gfx_v9_0_fault(adev, entry);
5973 	return 0;
5974 }
5975 
5976 
5977 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5978 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5979 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5980 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5981 	},
5982 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5983 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5984 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5985 	},
5986 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5987 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5988 	  0, 0
5989 	},
5990 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5991 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5992 	  0, 0
5993 	},
5994 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5995 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5996 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5997 	},
5998 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5999 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6000 	  0, 0
6001 	},
6002 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6003 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6004 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6005 	},
6006 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6007 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6008 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6009 	},
6010 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6011 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6012 	  0, 0
6013 	},
6014 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6015 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6016 	  0, 0
6017 	},
6018 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6019 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6020 	  0, 0
6021 	},
6022 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6023 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6024 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6025 	},
6026 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6027 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6028 	  0, 0
6029 	},
6030 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6031 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6032 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6033 	},
6034 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6035 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6036 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6037 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6038 	},
6039 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6040 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6041 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6042 	  0, 0
6043 	},
6044 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6045 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6046 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6047 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6048 	},
6049 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6050 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6051 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6052 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6053 	},
6054 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6055 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6056 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6057 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6058 	},
6059 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6060 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6061 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6062 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6063 	},
6064 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6065 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6066 	  0, 0
6067 	},
6068 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6069 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6070 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6071 	},
6072 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6073 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6074 	  0, 0
6075 	},
6076 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6077 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6078 	  0, 0
6079 	},
6080 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6081 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6082 	  0, 0
6083 	},
6084 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6085 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6086 	  0, 0
6087 	},
6088 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6089 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6090 	  0, 0
6091 	},
6092 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6093 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6094 	  0, 0
6095 	},
6096 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6097 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6098 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6099 	},
6100 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6101 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6102 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6103 	},
6104 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6105 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6106 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6107 	},
6108 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6109 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6110 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6111 	},
6112 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6113 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6114 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6115 	},
6116 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6117 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6118 	  0, 0
6119 	},
6120 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6121 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6122 	  0, 0
6123 	},
6124 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6125 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6126 	  0, 0
6127 	},
6128 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6129 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6130 	  0, 0
6131 	},
6132 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6133 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6134 	  0, 0
6135 	},
6136 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6137 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6138 	  0, 0
6139 	},
6140 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6141 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6142 	  0, 0
6143 	},
6144 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6145 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6146 	  0, 0
6147 	},
6148 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6149 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6150 	  0, 0
6151 	},
6152 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6153 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6154 	  0, 0
6155 	},
6156 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6157 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6158 	  0, 0
6159 	},
6160 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6161 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6162 	  0, 0
6163 	},
6164 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6165 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6166 	  0, 0
6167 	},
6168 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6169 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6170 	  0, 0
6171 	},
6172 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6173 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6174 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6175 	},
6176 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6177 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6178 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6179 	},
6180 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6181 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6182 	  0, 0
6183 	},
6184 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6185 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6186 	  0, 0
6187 	},
6188 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6189 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6190 	  0, 0
6191 	},
6192 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6193 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6194 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6195 	},
6196 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6197 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6198 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6199 	},
6200 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6201 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6202 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6203 	},
6204 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6205 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6206 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6207 	},
6208 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6209 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6210 	  0, 0
6211 	},
6212 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6213 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6214 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6215 	},
6216 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6217 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6218 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6219 	},
6220 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6221 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6222 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6223 	},
6224 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6225 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6226 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6227 	},
6228 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6229 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6230 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6231 	},
6232 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6233 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6234 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6235 	},
6236 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6237 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6238 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6239 	},
6240 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6241 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6242 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6243 	},
6244 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6245 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6246 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6247 	},
6248 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6249 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6250 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6251 	},
6252 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6253 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6254 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6255 	},
6256 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6257 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6258 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6259 	},
6260 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6261 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6262 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6263 	},
6264 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6265 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6266 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6267 	},
6268 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6270 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6271 	},
6272 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6274 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6275 	},
6276 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6278 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6279 	},
6280 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6282 	  0, 0
6283 	},
6284 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6286 	  0, 0
6287 	},
6288 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6289 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6290 	  0, 0
6291 	},
6292 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6293 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6294 	  0, 0
6295 	},
6296 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6297 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6298 	  0, 0
6299 	},
6300 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6301 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6302 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6303 	},
6304 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6305 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6306 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6307 	},
6308 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6309 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6310 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6311 	},
6312 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6313 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6314 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6315 	},
6316 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6317 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6318 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6319 	},
6320 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6321 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6322 	  0, 0
6323 	},
6324 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6325 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6326 	  0, 0
6327 	},
6328 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6329 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6330 	  0, 0
6331 	},
6332 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6333 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6334 	  0, 0
6335 	},
6336 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6337 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6338 	  0, 0
6339 	},
6340 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6341 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6342 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6343 	},
6344 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6345 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6346 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6347 	},
6348 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6350 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6351 	},
6352 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6354 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6355 	},
6356 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6358 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6359 	},
6360 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6361 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6362 	  0, 0
6363 	},
6364 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6365 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6366 	  0, 0
6367 	},
6368 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6369 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6370 	  0, 0
6371 	},
6372 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6373 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6374 	  0, 0
6375 	},
6376 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6377 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6378 	  0, 0
6379 	},
6380 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6381 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6382 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6383 	},
6384 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6385 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6386 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6387 	},
6388 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6389 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6390 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6391 	},
6392 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6393 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6394 	  0, 0
6395 	},
6396 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6397 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6398 	  0, 0
6399 	},
6400 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6401 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6402 	  0, 0
6403 	},
6404 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6405 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6406 	  0, 0
6407 	},
6408 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6409 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6410 	  0, 0
6411 	},
6412 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6413 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6414 	  0, 0
6415 	}
6416 };
6417 
6418 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6419 				     void *inject_if)
6420 {
6421 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6422 	int ret;
6423 	struct ta_ras_trigger_error_input block_info = { 0 };
6424 
6425 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6426 		return -EINVAL;
6427 
6428 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6429 		return -EINVAL;
6430 
6431 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6432 		return -EPERM;
6433 
6434 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6435 	      info->head.type)) {
6436 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6437 			ras_gfx_subblocks[info->head.sub_block_index].name,
6438 			info->head.type);
6439 		return -EPERM;
6440 	}
6441 
6442 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6443 	      info->head.type)) {
6444 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6445 			ras_gfx_subblocks[info->head.sub_block_index].name,
6446 			info->head.type);
6447 		return -EPERM;
6448 	}
6449 
6450 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6451 	block_info.sub_block_index =
6452 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6453 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6454 	block_info.address = info->address;
6455 	block_info.value = info->value;
6456 
6457 	mutex_lock(&adev->grbm_idx_mutex);
6458 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6459 	mutex_unlock(&adev->grbm_idx_mutex);
6460 
6461 	return ret;
6462 }
6463 
6464 static const char *vml2_mems[] = {
6465 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6466 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6467 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6468 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6469 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6470 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6471 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6472 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6473 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6474 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6475 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6476 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6477 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6478 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6479 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6480 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6481 };
6482 
6483 static const char *vml2_walker_mems[] = {
6484 	"UTC_VML2_CACHE_PDE0_MEM0",
6485 	"UTC_VML2_CACHE_PDE0_MEM1",
6486 	"UTC_VML2_CACHE_PDE1_MEM0",
6487 	"UTC_VML2_CACHE_PDE1_MEM1",
6488 	"UTC_VML2_CACHE_PDE2_MEM0",
6489 	"UTC_VML2_CACHE_PDE2_MEM1",
6490 	"UTC_VML2_RDIF_LOG_FIFO",
6491 };
6492 
6493 static const char *atc_l2_cache_2m_mems[] = {
6494 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6495 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6496 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6497 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6498 };
6499 
6500 static const char *atc_l2_cache_4k_mems[] = {
6501 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6502 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6503 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6504 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6505 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6506 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6507 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6508 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6509 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6510 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6511 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6512 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6513 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6514 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6515 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6516 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6517 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6518 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6519 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6520 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6521 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6522 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6523 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6524 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6525 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6526 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6527 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6528 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6529 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6530 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6531 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6532 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6533 };
6534 
6535 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6536 					 struct ras_err_data *err_data)
6537 {
6538 	uint32_t i, data;
6539 	uint32_t sec_count, ded_count;
6540 
6541 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6542 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6543 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6544 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6545 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6546 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6547 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6548 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6549 
6550 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6551 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6552 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6553 
6554 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6555 		if (sec_count) {
6556 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6557 				"SEC %d\n", i, vml2_mems[i], sec_count);
6558 			err_data->ce_count += sec_count;
6559 		}
6560 
6561 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6562 		if (ded_count) {
6563 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6564 				"DED %d\n", i, vml2_mems[i], ded_count);
6565 			err_data->ue_count += ded_count;
6566 		}
6567 	}
6568 
6569 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6570 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6571 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6572 
6573 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6574 						SEC_COUNT);
6575 		if (sec_count) {
6576 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6577 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6578 			err_data->ce_count += sec_count;
6579 		}
6580 
6581 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6582 						DED_COUNT);
6583 		if (ded_count) {
6584 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6585 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6586 			err_data->ue_count += ded_count;
6587 		}
6588 	}
6589 
6590 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6591 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6592 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6593 
6594 		sec_count = (data & 0x00006000L) >> 0xd;
6595 		if (sec_count) {
6596 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6597 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6598 				sec_count);
6599 			err_data->ce_count += sec_count;
6600 		}
6601 	}
6602 
6603 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6604 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6605 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6606 
6607 		sec_count = (data & 0x00006000L) >> 0xd;
6608 		if (sec_count) {
6609 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6610 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6611 				sec_count);
6612 			err_data->ce_count += sec_count;
6613 		}
6614 
6615 		ded_count = (data & 0x00018000L) >> 0xf;
6616 		if (ded_count) {
6617 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6618 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6619 				ded_count);
6620 			err_data->ue_count += ded_count;
6621 		}
6622 	}
6623 
6624 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6625 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6626 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6627 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6628 
6629 	return 0;
6630 }
6631 
6632 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6633 	const struct soc15_reg_entry *reg,
6634 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6635 	uint32_t *sec_count, uint32_t *ded_count)
6636 {
6637 	uint32_t i;
6638 	uint32_t sec_cnt, ded_cnt;
6639 
6640 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6641 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6642 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6643 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6644 			continue;
6645 
6646 		sec_cnt = (value &
6647 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6648 				gfx_v9_0_ras_fields[i].sec_count_shift;
6649 		if (sec_cnt) {
6650 			dev_info(adev->dev, "GFX SubBlock %s, "
6651 				"Instance[%d][%d], SEC %d\n",
6652 				gfx_v9_0_ras_fields[i].name,
6653 				se_id, inst_id,
6654 				sec_cnt);
6655 			*sec_count += sec_cnt;
6656 		}
6657 
6658 		ded_cnt = (value &
6659 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6660 				gfx_v9_0_ras_fields[i].ded_count_shift;
6661 		if (ded_cnt) {
6662 			dev_info(adev->dev, "GFX SubBlock %s, "
6663 				"Instance[%d][%d], DED %d\n",
6664 				gfx_v9_0_ras_fields[i].name,
6665 				se_id, inst_id,
6666 				ded_cnt);
6667 			*ded_count += ded_cnt;
6668 		}
6669 	}
6670 
6671 	return 0;
6672 }
6673 
6674 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6675 {
6676 	int i, j, k;
6677 
6678 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6679 		return;
6680 
6681 	/* read back registers to clear the counters */
6682 	mutex_lock(&adev->grbm_idx_mutex);
6683 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6684 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6685 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6686 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6687 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6688 			}
6689 		}
6690 	}
6691 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6692 	mutex_unlock(&adev->grbm_idx_mutex);
6693 
6694 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6695 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6696 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6697 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6698 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6699 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6700 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6701 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6702 
6703 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6704 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6705 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6706 	}
6707 
6708 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6709 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6710 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6711 	}
6712 
6713 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6714 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6715 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6716 	}
6717 
6718 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6719 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6720 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6721 	}
6722 
6723 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6724 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6725 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6726 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6727 }
6728 
6729 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6730 					  void *ras_error_status)
6731 {
6732 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6733 	uint32_t sec_count = 0, ded_count = 0;
6734 	uint32_t i, j, k;
6735 	uint32_t reg_value;
6736 
6737 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6738 		return;
6739 
6740 	err_data->ue_count = 0;
6741 	err_data->ce_count = 0;
6742 
6743 	mutex_lock(&adev->grbm_idx_mutex);
6744 
6745 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6746 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6747 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6748 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6749 				reg_value =
6750 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6751 				if (reg_value)
6752 					gfx_v9_0_ras_error_count(adev,
6753 						&gfx_v9_0_edc_counter_regs[i],
6754 						j, k, reg_value,
6755 						&sec_count, &ded_count);
6756 			}
6757 		}
6758 	}
6759 
6760 	err_data->ce_count += sec_count;
6761 	err_data->ue_count += ded_count;
6762 
6763 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6764 	mutex_unlock(&adev->grbm_idx_mutex);
6765 
6766 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6767 }
6768 
6769 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6770 {
6771 	const unsigned int cp_coher_cntl =
6772 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6773 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6774 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6775 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6776 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6777 
6778 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6779 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6780 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6781 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6782 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6783 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6784 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6785 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6786 }
6787 
6788 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6789 					uint32_t pipe, bool enable)
6790 {
6791 	struct amdgpu_device *adev = ring->adev;
6792 	uint32_t val;
6793 	uint32_t wcl_cs_reg;
6794 
6795 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6796 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6797 
6798 	switch (pipe) {
6799 	case 0:
6800 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6801 		break;
6802 	case 1:
6803 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6804 		break;
6805 	case 2:
6806 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6807 		break;
6808 	case 3:
6809 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6810 		break;
6811 	default:
6812 		DRM_DEBUG("invalid pipe %d\n", pipe);
6813 		return;
6814 	}
6815 
6816 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6817 
6818 }
6819 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6820 {
6821 	struct amdgpu_device *adev = ring->adev;
6822 	uint32_t val;
6823 	int i;
6824 
6825 
6826 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6827 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6828 	 * around 25% of gpu resources.
6829 	 */
6830 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6831 	amdgpu_ring_emit_wreg(ring,
6832 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6833 			      val);
6834 
6835 	/* Restrict waves for normal/low priority compute queues as well
6836 	 * to get best QoS for high priority compute jobs.
6837 	 *
6838 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6839 	 */
6840 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6841 		if (i != ring->pipe)
6842 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6843 
6844 	}
6845 }
6846 
6847 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6848 	.name = "gfx_v9_0",
6849 	.early_init = gfx_v9_0_early_init,
6850 	.late_init = gfx_v9_0_late_init,
6851 	.sw_init = gfx_v9_0_sw_init,
6852 	.sw_fini = gfx_v9_0_sw_fini,
6853 	.hw_init = gfx_v9_0_hw_init,
6854 	.hw_fini = gfx_v9_0_hw_fini,
6855 	.suspend = gfx_v9_0_suspend,
6856 	.resume = gfx_v9_0_resume,
6857 	.is_idle = gfx_v9_0_is_idle,
6858 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6859 	.soft_reset = gfx_v9_0_soft_reset,
6860 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6861 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6862 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6863 };
6864 
6865 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6866 	.type = AMDGPU_RING_TYPE_GFX,
6867 	.align_mask = 0xff,
6868 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6869 	.support_64bit_ptrs = true,
6870 	.secure_submission_supported = true,
6871 	.vmhub = AMDGPU_GFXHUB_0,
6872 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6873 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6874 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6875 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6876 		5 +  /* COND_EXEC */
6877 		7 +  /* PIPELINE_SYNC */
6878 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6879 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6880 		2 + /* VM_FLUSH */
6881 		8 +  /* FENCE for VM_FLUSH */
6882 		20 + /* GDS switch */
6883 		4 + /* double SWITCH_BUFFER,
6884 		       the first COND_EXEC jump to the place just
6885 			   prior to this double SWITCH_BUFFER  */
6886 		5 + /* COND_EXEC */
6887 		7 +	 /*	HDP_flush */
6888 		4 +	 /*	VGT_flush */
6889 		14 + /*	CE_META */
6890 		31 + /*	DE_META */
6891 		3 + /* CNTX_CTRL */
6892 		5 + /* HDP_INVL */
6893 		8 + 8 + /* FENCE x2 */
6894 		2 + /* SWITCH_BUFFER */
6895 		7, /* gfx_v9_0_emit_mem_sync */
6896 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6897 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6898 	.emit_fence = gfx_v9_0_ring_emit_fence,
6899 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6900 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6901 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6902 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6903 	.test_ring = gfx_v9_0_ring_test_ring,
6904 	.test_ib = gfx_v9_0_ring_test_ib,
6905 	.insert_nop = amdgpu_ring_insert_nop,
6906 	.pad_ib = amdgpu_ring_generic_pad_ib,
6907 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6908 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6909 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6910 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6911 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6912 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6913 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6914 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6915 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6916 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6917 };
6918 
6919 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6920 	.type = AMDGPU_RING_TYPE_COMPUTE,
6921 	.align_mask = 0xff,
6922 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6923 	.support_64bit_ptrs = true,
6924 	.vmhub = AMDGPU_GFXHUB_0,
6925 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6926 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6927 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6928 	.emit_frame_size =
6929 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6930 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6931 		5 + /* hdp invalidate */
6932 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6933 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6934 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6935 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6936 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6937 		7 + /* gfx_v9_0_emit_mem_sync */
6938 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6939 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6940 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6941 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6942 	.emit_fence = gfx_v9_0_ring_emit_fence,
6943 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6944 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6945 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6946 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6947 	.test_ring = gfx_v9_0_ring_test_ring,
6948 	.test_ib = gfx_v9_0_ring_test_ib,
6949 	.insert_nop = amdgpu_ring_insert_nop,
6950 	.pad_ib = amdgpu_ring_generic_pad_ib,
6951 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6952 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6953 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6954 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6955 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6956 };
6957 
6958 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6959 	.type = AMDGPU_RING_TYPE_KIQ,
6960 	.align_mask = 0xff,
6961 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6962 	.support_64bit_ptrs = true,
6963 	.vmhub = AMDGPU_GFXHUB_0,
6964 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6965 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6966 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6967 	.emit_frame_size =
6968 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6969 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6970 		5 + /* hdp invalidate */
6971 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6972 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6973 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6974 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6975 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6976 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6977 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6978 	.test_ring = gfx_v9_0_ring_test_ring,
6979 	.insert_nop = amdgpu_ring_insert_nop,
6980 	.pad_ib = amdgpu_ring_generic_pad_ib,
6981 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6982 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6983 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6984 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6985 };
6986 
6987 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6988 {
6989 	int i;
6990 
6991 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6992 
6993 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6994 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6995 
6996 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6997 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6998 }
6999 
7000 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7001 	.set = gfx_v9_0_set_eop_interrupt_state,
7002 	.process = gfx_v9_0_eop_irq,
7003 };
7004 
7005 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7006 	.set = gfx_v9_0_set_priv_reg_fault_state,
7007 	.process = gfx_v9_0_priv_reg_irq,
7008 };
7009 
7010 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7011 	.set = gfx_v9_0_set_priv_inst_fault_state,
7012 	.process = gfx_v9_0_priv_inst_irq,
7013 };
7014 
7015 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7016 	.set = gfx_v9_0_set_cp_ecc_error_state,
7017 	.process = amdgpu_gfx_cp_ecc_error_irq,
7018 };
7019 
7020 
7021 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7022 {
7023 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7024 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7025 
7026 	adev->gfx.priv_reg_irq.num_types = 1;
7027 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7028 
7029 	adev->gfx.priv_inst_irq.num_types = 1;
7030 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7031 
7032 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7033 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7034 }
7035 
7036 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7037 {
7038 	switch (adev->ip_versions[GC_HWIP][0]) {
7039 	case IP_VERSION(9, 0, 1):
7040 	case IP_VERSION(9, 2, 1):
7041 	case IP_VERSION(9, 4, 0):
7042 	case IP_VERSION(9, 2, 2):
7043 	case IP_VERSION(9, 1, 0):
7044 	case IP_VERSION(9, 4, 1):
7045 	case IP_VERSION(9, 3, 0):
7046 	case IP_VERSION(9, 4, 2):
7047 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7048 		break;
7049 	default:
7050 		break;
7051 	}
7052 }
7053 
7054 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7055 {
7056 	/* init asci gds info */
7057 	switch (adev->ip_versions[GC_HWIP][0]) {
7058 	case IP_VERSION(9, 0, 1):
7059 	case IP_VERSION(9, 2, 1):
7060 	case IP_VERSION(9, 4, 0):
7061 		adev->gds.gds_size = 0x10000;
7062 		break;
7063 	case IP_VERSION(9, 2, 2):
7064 	case IP_VERSION(9, 1, 0):
7065 	case IP_VERSION(9, 4, 1):
7066 		adev->gds.gds_size = 0x1000;
7067 		break;
7068 	case IP_VERSION(9, 4, 2):
7069 		/* aldebaran removed all the GDS internal memory,
7070 		 * only support GWS opcode in kernel, like barrier
7071 		 * semaphore.etc */
7072 		adev->gds.gds_size = 0;
7073 		break;
7074 	default:
7075 		adev->gds.gds_size = 0x10000;
7076 		break;
7077 	}
7078 
7079 	switch (adev->ip_versions[GC_HWIP][0]) {
7080 	case IP_VERSION(9, 0, 1):
7081 	case IP_VERSION(9, 4, 0):
7082 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7083 		break;
7084 	case IP_VERSION(9, 2, 1):
7085 		adev->gds.gds_compute_max_wave_id = 0x27f;
7086 		break;
7087 	case IP_VERSION(9, 2, 2):
7088 	case IP_VERSION(9, 1, 0):
7089 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7090 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7091 		else
7092 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7093 		break;
7094 	case IP_VERSION(9, 4, 1):
7095 		adev->gds.gds_compute_max_wave_id = 0xfff;
7096 		break;
7097 	case IP_VERSION(9, 4, 2):
7098 		/* deprecated for Aldebaran, no usage at all */
7099 		adev->gds.gds_compute_max_wave_id = 0;
7100 		break;
7101 	default:
7102 		/* this really depends on the chip */
7103 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7104 		break;
7105 	}
7106 
7107 	adev->gds.gws_size = 64;
7108 	adev->gds.oa_size = 16;
7109 }
7110 
7111 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7112 						 u32 bitmap)
7113 {
7114 	u32 data;
7115 
7116 	if (!bitmap)
7117 		return;
7118 
7119 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7120 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7121 
7122 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7123 }
7124 
7125 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7126 {
7127 	u32 data, mask;
7128 
7129 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7130 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7131 
7132 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7133 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7134 
7135 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7136 
7137 	return (~data) & mask;
7138 }
7139 
7140 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7141 				 struct amdgpu_cu_info *cu_info)
7142 {
7143 	int i, j, k, counter, active_cu_number = 0;
7144 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7145 	unsigned disable_masks[4 * 4];
7146 
7147 	if (!adev || !cu_info)
7148 		return -EINVAL;
7149 
7150 	/*
7151 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7152 	 */
7153 	if (adev->gfx.config.max_shader_engines *
7154 		adev->gfx.config.max_sh_per_se > 16)
7155 		return -EINVAL;
7156 
7157 	amdgpu_gfx_parse_disable_cu(disable_masks,
7158 				    adev->gfx.config.max_shader_engines,
7159 				    adev->gfx.config.max_sh_per_se);
7160 
7161 	mutex_lock(&adev->grbm_idx_mutex);
7162 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7163 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7164 			mask = 1;
7165 			ao_bitmap = 0;
7166 			counter = 0;
7167 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7168 			gfx_v9_0_set_user_cu_inactive_bitmap(
7169 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7170 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7171 
7172 			/*
7173 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7174 			 * 4x4 size array, and it's usually suitable for Vega
7175 			 * ASICs which has 4*2 SE/SH layout.
7176 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7177 			 * To mostly reduce the impact, we make it compatible
7178 			 * with current bitmap array as below:
7179 			 *    SE4,SH0 --> bitmap[0][1]
7180 			 *    SE5,SH0 --> bitmap[1][1]
7181 			 *    SE6,SH0 --> bitmap[2][1]
7182 			 *    SE7,SH0 --> bitmap[3][1]
7183 			 */
7184 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7185 
7186 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7187 				if (bitmap & mask) {
7188 					if (counter < adev->gfx.config.max_cu_per_sh)
7189 						ao_bitmap |= mask;
7190 					counter ++;
7191 				}
7192 				mask <<= 1;
7193 			}
7194 			active_cu_number += counter;
7195 			if (i < 2 && j < 2)
7196 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7197 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7198 		}
7199 	}
7200 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7201 	mutex_unlock(&adev->grbm_idx_mutex);
7202 
7203 	cu_info->number = active_cu_number;
7204 	cu_info->ao_cu_mask = ao_cu_mask;
7205 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7206 
7207 	return 0;
7208 }
7209 
7210 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7211 {
7212 	.type = AMD_IP_BLOCK_TYPE_GFX,
7213 	.major = 9,
7214 	.minor = 0,
7215 	.rev = 0,
7216 	.funcs = &gfx_v9_0_ip_funcs,
7217 };
7218