xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 26b32974)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54 
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58 
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133 
134 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146 
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151 
152 enum ta_ras_gfx_subblock {
153 	/*CPC*/
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPC_UCODE,
157 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164 	/* CPF*/
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168 	TA_RAS_BLOCK__GFX_CPF_TAG,
169 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170 	/* CPG*/
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174 	TA_RAS_BLOCK__GFX_CPG_TAG,
175 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176 	/* GDS*/
177 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184 	/* SPI*/
185 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186 	/* SQ*/
187 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
190 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
191 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193 	/* SQC (3 ranges)*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 	/* SQC range 0*/
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207 	/* SQC range 1*/
208 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221 	/* SQC range 2*/
222 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236 	/* TA*/
237 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244 	/* TCA*/
245 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249 	/* TCC (5 sub-ranges)*/
250 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 	/* TCC range 0*/
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262 	/* TCC range 1*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268 	/* TCC range 2*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280 	/* TCC range 3*/
281 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286 	/* TCC range 4*/
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294 	/* TCI*/
295 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296 	/* TCP*/
297 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306 	/* TD*/
307 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
308 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312 	/* EA (3 sub-ranges)*/
313 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 	/* EA range 0*/
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325 	/* EA range 1*/
326 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335 	/* EA range 2*/
336 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343 	/* UTC VM L2 bank*/
344 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345 	/* UTC VM walker*/
346 	TA_RAS_BLOCK__UTC_VML2_WALKER,
347 	/* UTC ATC L2 2MB cache*/
348 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349 	/* UTC ATC L2 4KB cache*/
350 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351 	TA_RAS_BLOCK__GFX_MAX
352 };
353 
354 struct ras_gfx_subblock {
355 	unsigned char *name;
356 	int ta_subblock;
357 	int hw_supported_error_type;
358 	int sw_supported_error_type;
359 };
360 
361 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
362 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
363 		#subblock,                                                     \
364 		TA_RAS_BLOCK__##subblock,                                      \
365 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
366 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
367 	}
368 
369 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389 			     0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     1),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430 			     0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442 			     0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456 			     1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458 			     1),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462 			     0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475 			     0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480 			     0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482 			     0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517 };
518 
519 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520 {
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541 };
542 
543 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544 {
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563 };
564 
565 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566 {
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578 };
579 
580 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581 {
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606 };
607 
608 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609 {
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617 };
618 
619 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620 {
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656 };
657 
658 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659 {
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663 };
664 
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666 {
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683 };
684 
685 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686 {
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700 };
701 
702 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703 {
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715 };
716 
717 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720 };
721 
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723 {
724 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 };
733 
734 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735 {
736 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 };
745 
746 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750 
751 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756 				struct amdgpu_cu_info *cu_info);
757 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
759 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761 					  void *ras_error_status);
762 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763 				     void *inject_if);
764 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765 
766 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
767 				uint64_t queue_mask)
768 {
769 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
770 	amdgpu_ring_write(kiq_ring,
771 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
772 		/* vmid_mask:0* queue_type:0 (KIQ) */
773 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
774 	amdgpu_ring_write(kiq_ring,
775 			lower_32_bits(queue_mask));	/* queue mask lo */
776 	amdgpu_ring_write(kiq_ring,
777 			upper_32_bits(queue_mask));	/* queue mask hi */
778 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
779 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
780 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
781 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
782 }
783 
784 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
785 				 struct amdgpu_ring *ring)
786 {
787 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
788 	uint64_t wptr_addr = ring->wptr_gpu_addr;
789 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
790 
791 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
792 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
793 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
794 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
795 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
796 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
797 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
798 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
799 			 /*queue_type: normal compute queue */
800 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
801 			 /* alloc format: all_on_one_pipe */
802 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
803 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
804 			 /* num_queues: must be 1 */
805 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
806 	amdgpu_ring_write(kiq_ring,
807 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
808 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
809 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
810 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
811 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
812 }
813 
814 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
815 				   struct amdgpu_ring *ring,
816 				   enum amdgpu_unmap_queues_action action,
817 				   u64 gpu_addr, u64 seq)
818 {
819 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
820 
821 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
822 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
823 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
824 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
825 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
826 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
827 	amdgpu_ring_write(kiq_ring,
828 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
829 
830 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
831 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
832 		amdgpu_ring_write(kiq_ring, 0);
833 		amdgpu_ring_write(kiq_ring, 0);
834 
835 	} else {
836 		amdgpu_ring_write(kiq_ring, 0);
837 		amdgpu_ring_write(kiq_ring, 0);
838 		amdgpu_ring_write(kiq_ring, 0);
839 	}
840 }
841 
842 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
843 				   struct amdgpu_ring *ring,
844 				   u64 addr,
845 				   u64 seq)
846 {
847 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
848 
849 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
850 	amdgpu_ring_write(kiq_ring,
851 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
852 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
853 			  PACKET3_QUERY_STATUS_COMMAND(2));
854 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855 	amdgpu_ring_write(kiq_ring,
856 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
857 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
858 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
859 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
860 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
861 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
862 }
863 
864 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
865 				uint16_t pasid, uint32_t flush_type,
866 				bool all_hub)
867 {
868 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
869 	amdgpu_ring_write(kiq_ring,
870 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
871 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
872 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
873 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
874 }
875 
876 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
877 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
878 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
879 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
880 	.kiq_query_status = gfx_v9_0_kiq_query_status,
881 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
882 	.set_resources_size = 8,
883 	.map_queues_size = 7,
884 	.unmap_queues_size = 6,
885 	.query_status_size = 7,
886 	.invalidate_tlbs_size = 2,
887 };
888 
889 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
890 {
891 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
892 }
893 
894 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
895 {
896 	switch (adev->ip_versions[GC_HWIP][0]) {
897 	case IP_VERSION(9, 0, 1):
898 		soc15_program_register_sequence(adev,
899 						golden_settings_gc_9_0,
900 						ARRAY_SIZE(golden_settings_gc_9_0));
901 		soc15_program_register_sequence(adev,
902 						golden_settings_gc_9_0_vg10,
903 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
904 		break;
905 	case IP_VERSION(9, 2, 1):
906 		soc15_program_register_sequence(adev,
907 						golden_settings_gc_9_2_1,
908 						ARRAY_SIZE(golden_settings_gc_9_2_1));
909 		soc15_program_register_sequence(adev,
910 						golden_settings_gc_9_2_1_vg12,
911 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
912 		break;
913 	case IP_VERSION(9, 4, 0):
914 		soc15_program_register_sequence(adev,
915 						golden_settings_gc_9_0,
916 						ARRAY_SIZE(golden_settings_gc_9_0));
917 		soc15_program_register_sequence(adev,
918 						golden_settings_gc_9_0_vg20,
919 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
920 		break;
921 	case IP_VERSION(9, 4, 1):
922 		soc15_program_register_sequence(adev,
923 						golden_settings_gc_9_4_1_arct,
924 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
925 		break;
926 	case IP_VERSION(9, 2, 2):
927 	case IP_VERSION(9, 1, 0):
928 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
929 						ARRAY_SIZE(golden_settings_gc_9_1));
930 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
931 			soc15_program_register_sequence(adev,
932 							golden_settings_gc_9_1_rv2,
933 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
934 		else
935 			soc15_program_register_sequence(adev,
936 							golden_settings_gc_9_1_rv1,
937 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
938 		break;
939 	 case IP_VERSION(9, 3, 0):
940 		soc15_program_register_sequence(adev,
941 						golden_settings_gc_9_1_rn,
942 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
943 		return; /* for renoir, don't need common goldensetting */
944 	case IP_VERSION(9, 4, 2):
945 		gfx_v9_4_2_init_golden_registers(adev,
946 						 adev->smuio.funcs->get_die_id(adev));
947 		break;
948 	default:
949 		break;
950 	}
951 
952 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
953 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
954 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
955 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
956 }
957 
958 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
959 				       bool wc, uint32_t reg, uint32_t val)
960 {
961 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
962 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
963 				WRITE_DATA_DST_SEL(0) |
964 				(wc ? WR_CONFIRM : 0));
965 	amdgpu_ring_write(ring, reg);
966 	amdgpu_ring_write(ring, 0);
967 	amdgpu_ring_write(ring, val);
968 }
969 
970 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
971 				  int mem_space, int opt, uint32_t addr0,
972 				  uint32_t addr1, uint32_t ref, uint32_t mask,
973 				  uint32_t inv)
974 {
975 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
976 	amdgpu_ring_write(ring,
977 				 /* memory (1) or register (0) */
978 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
979 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
980 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
981 				 WAIT_REG_MEM_ENGINE(eng_sel)));
982 
983 	if (mem_space)
984 		BUG_ON(addr0 & 0x3); /* Dword align */
985 	amdgpu_ring_write(ring, addr0);
986 	amdgpu_ring_write(ring, addr1);
987 	amdgpu_ring_write(ring, ref);
988 	amdgpu_ring_write(ring, mask);
989 	amdgpu_ring_write(ring, inv); /* poll interval */
990 }
991 
992 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
993 {
994 	struct amdgpu_device *adev = ring->adev;
995 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
996 	uint32_t tmp = 0;
997 	unsigned i;
998 	int r;
999 
1000 	WREG32(scratch, 0xCAFEDEAD);
1001 	r = amdgpu_ring_alloc(ring, 3);
1002 	if (r)
1003 		return r;
1004 
1005 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1006 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1007 	amdgpu_ring_write(ring, 0xDEADBEEF);
1008 	amdgpu_ring_commit(ring);
1009 
1010 	for (i = 0; i < adev->usec_timeout; i++) {
1011 		tmp = RREG32(scratch);
1012 		if (tmp == 0xDEADBEEF)
1013 			break;
1014 		udelay(1);
1015 	}
1016 
1017 	if (i >= adev->usec_timeout)
1018 		r = -ETIMEDOUT;
1019 	return r;
1020 }
1021 
1022 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1023 {
1024 	struct amdgpu_device *adev = ring->adev;
1025 	struct amdgpu_ib ib;
1026 	struct dma_fence *f = NULL;
1027 
1028 	unsigned index;
1029 	uint64_t gpu_addr;
1030 	uint32_t tmp;
1031 	long r;
1032 
1033 	r = amdgpu_device_wb_get(adev, &index);
1034 	if (r)
1035 		return r;
1036 
1037 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1038 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1039 	memset(&ib, 0, sizeof(ib));
1040 	r = amdgpu_ib_get(adev, NULL, 16,
1041 					AMDGPU_IB_POOL_DIRECT, &ib);
1042 	if (r)
1043 		goto err1;
1044 
1045 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1046 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1047 	ib.ptr[2] = lower_32_bits(gpu_addr);
1048 	ib.ptr[3] = upper_32_bits(gpu_addr);
1049 	ib.ptr[4] = 0xDEADBEEF;
1050 	ib.length_dw = 5;
1051 
1052 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1053 	if (r)
1054 		goto err2;
1055 
1056 	r = dma_fence_wait_timeout(f, false, timeout);
1057 	if (r == 0) {
1058 		r = -ETIMEDOUT;
1059 		goto err2;
1060 	} else if (r < 0) {
1061 		goto err2;
1062 	}
1063 
1064 	tmp = adev->wb.wb[index];
1065 	if (tmp == 0xDEADBEEF)
1066 		r = 0;
1067 	else
1068 		r = -EINVAL;
1069 
1070 err2:
1071 	amdgpu_ib_free(adev, &ib, NULL);
1072 	dma_fence_put(f);
1073 err1:
1074 	amdgpu_device_wb_free(adev, index);
1075 	return r;
1076 }
1077 
1078 
1079 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1080 {
1081 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1082 	amdgpu_ucode_release(&adev->gfx.me_fw);
1083 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1084 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1085 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1086 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1087 
1088 	kfree(adev->gfx.rlc.register_list_format);
1089 }
1090 
1091 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1092 {
1093 	adev->gfx.me_fw_write_wait = false;
1094 	adev->gfx.mec_fw_write_wait = false;
1095 
1096 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1097 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1098 	    (adev->gfx.mec_feature_version < 46) ||
1099 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1100 	    (adev->gfx.pfp_feature_version < 46)))
1101 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1102 
1103 	switch (adev->ip_versions[GC_HWIP][0]) {
1104 	case IP_VERSION(9, 0, 1):
1105 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1106 		    (adev->gfx.me_feature_version >= 42) &&
1107 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1108 		    (adev->gfx.pfp_feature_version >= 42))
1109 			adev->gfx.me_fw_write_wait = true;
1110 
1111 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1112 		    (adev->gfx.mec_feature_version >= 42))
1113 			adev->gfx.mec_fw_write_wait = true;
1114 		break;
1115 	case IP_VERSION(9, 2, 1):
1116 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1117 		    (adev->gfx.me_feature_version >= 44) &&
1118 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1119 		    (adev->gfx.pfp_feature_version >= 44))
1120 			adev->gfx.me_fw_write_wait = true;
1121 
1122 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1123 		    (adev->gfx.mec_feature_version >= 44))
1124 			adev->gfx.mec_fw_write_wait = true;
1125 		break;
1126 	case IP_VERSION(9, 4, 0):
1127 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1128 		    (adev->gfx.me_feature_version >= 44) &&
1129 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1130 		    (adev->gfx.pfp_feature_version >= 44))
1131 			adev->gfx.me_fw_write_wait = true;
1132 
1133 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1134 		    (adev->gfx.mec_feature_version >= 44))
1135 			adev->gfx.mec_fw_write_wait = true;
1136 		break;
1137 	case IP_VERSION(9, 1, 0):
1138 	case IP_VERSION(9, 2, 2):
1139 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1140 		    (adev->gfx.me_feature_version >= 42) &&
1141 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1142 		    (adev->gfx.pfp_feature_version >= 42))
1143 			adev->gfx.me_fw_write_wait = true;
1144 
1145 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1146 		    (adev->gfx.mec_feature_version >= 42))
1147 			adev->gfx.mec_fw_write_wait = true;
1148 		break;
1149 	default:
1150 		adev->gfx.me_fw_write_wait = true;
1151 		adev->gfx.mec_fw_write_wait = true;
1152 		break;
1153 	}
1154 }
1155 
1156 struct amdgpu_gfxoff_quirk {
1157 	u16 chip_vendor;
1158 	u16 chip_device;
1159 	u16 subsys_vendor;
1160 	u16 subsys_device;
1161 	u8 revision;
1162 };
1163 
1164 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1165 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1166 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1167 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1168 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1169 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1170 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1171 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1172 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1173 	{ 0, 0, 0, 0, 0 },
1174 };
1175 
1176 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1177 {
1178 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1179 
1180 	while (p && p->chip_device != 0) {
1181 		if (pdev->vendor == p->chip_vendor &&
1182 		    pdev->device == p->chip_device &&
1183 		    pdev->subsystem_vendor == p->subsys_vendor &&
1184 		    pdev->subsystem_device == p->subsys_device &&
1185 		    pdev->revision == p->revision) {
1186 			return true;
1187 		}
1188 		++p;
1189 	}
1190 	return false;
1191 }
1192 
1193 static bool is_raven_kicker(struct amdgpu_device *adev)
1194 {
1195 	if (adev->pm.fw_version >= 0x41e2b)
1196 		return true;
1197 	else
1198 		return false;
1199 }
1200 
1201 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1202 {
1203 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1204 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1205 	    (adev->gfx.me_feature_version >= 52))
1206 		return true;
1207 	else
1208 		return false;
1209 }
1210 
1211 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1212 {
1213 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1214 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1215 
1216 	switch (adev->ip_versions[GC_HWIP][0]) {
1217 	case IP_VERSION(9, 0, 1):
1218 	case IP_VERSION(9, 2, 1):
1219 	case IP_VERSION(9, 4, 0):
1220 		break;
1221 	case IP_VERSION(9, 2, 2):
1222 	case IP_VERSION(9, 1, 0):
1223 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1224 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1225 		    ((!is_raven_kicker(adev) &&
1226 		      adev->gfx.rlc_fw_version < 531) ||
1227 		     (adev->gfx.rlc_feature_version < 1) ||
1228 		     !adev->gfx.rlc.is_rlc_v2_1))
1229 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1230 
1231 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1232 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1233 				AMD_PG_SUPPORT_CP |
1234 				AMD_PG_SUPPORT_RLC_SMU_HS;
1235 		break;
1236 	case IP_VERSION(9, 3, 0):
1237 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1238 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1239 				AMD_PG_SUPPORT_CP |
1240 				AMD_PG_SUPPORT_RLC_SMU_HS;
1241 		break;
1242 	default:
1243 		break;
1244 	}
1245 }
1246 
1247 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1248 					  char *chip_name)
1249 {
1250 	char fw_name[30];
1251 	int err;
1252 
1253 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1254 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1255 	if (err)
1256 		goto out;
1257 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1258 
1259 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1260 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1261 	if (err)
1262 		goto out;
1263 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1264 
1265 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1266 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1267 	if (err)
1268 		goto out;
1269 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1270 
1271 out:
1272 	if (err) {
1273 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1274 		amdgpu_ucode_release(&adev->gfx.me_fw);
1275 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1276 	}
1277 	return err;
1278 }
1279 
1280 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1281 				       char *chip_name)
1282 {
1283 	char fw_name[30];
1284 	int err;
1285 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1286 	uint16_t version_major;
1287 	uint16_t version_minor;
1288 	uint32_t smu_version;
1289 
1290 	/*
1291 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1292 	 * instead of picasso_rlc.bin.
1293 	 * Judgment method:
1294 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1295 	 *          or revision >= 0xD8 && revision <= 0xDF
1296 	 * otherwise is PCO FP5
1297 	 */
1298 	if (!strcmp(chip_name, "picasso") &&
1299 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1300 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1301 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1302 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1303 		(smu_version >= 0x41e2b))
1304 		/**
1305 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1306 		*/
1307 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1308 	else
1309 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1310 	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1311 	if (err)
1312 		goto out;
1313 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1314 
1315 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1316 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1317 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1318 out:
1319 	if (err)
1320 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1321 
1322 	return err;
1323 }
1324 
1325 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1326 {
1327 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1328 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1329 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1330 		return false;
1331 
1332 	return true;
1333 }
1334 
1335 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1336 					      char *chip_name)
1337 {
1338 	char fw_name[30];
1339 	int err;
1340 
1341 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1342 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1343 	else
1344 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1345 
1346 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1347 	if (err)
1348 		goto out;
1349 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1350 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1351 
1352 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1353 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1354 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1355 		else
1356 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1357 
1358 		/* ignore failures to load */
1359 		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1360 		if (!err) {
1361 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1362 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1363 		} else {
1364 			err = 0;
1365 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1366 		}
1367 	} else {
1368 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1369 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1370 	}
1371 
1372 	gfx_v9_0_check_if_need_gfxoff(adev);
1373 	gfx_v9_0_check_fw_write_wait(adev);
1374 
1375 out:
1376 	if (err)
1377 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1378 	return err;
1379 }
1380 
1381 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1382 {
1383 	char ucode_prefix[30];
1384 	int r;
1385 
1386 	DRM_DEBUG("\n");
1387 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1388 
1389 	/* No CPG in Arcturus */
1390 	if (adev->gfx.num_gfx_rings) {
1391 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1392 		if (r)
1393 			return r;
1394 	}
1395 
1396 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1397 	if (r)
1398 		return r;
1399 
1400 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1401 	if (r)
1402 		return r;
1403 
1404 	return r;
1405 }
1406 
1407 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1408 {
1409 	u32 count = 0;
1410 	const struct cs_section_def *sect = NULL;
1411 	const struct cs_extent_def *ext = NULL;
1412 
1413 	/* begin clear state */
1414 	count += 2;
1415 	/* context control state */
1416 	count += 3;
1417 
1418 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1419 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1420 			if (sect->id == SECT_CONTEXT)
1421 				count += 2 + ext->reg_count;
1422 			else
1423 				return 0;
1424 		}
1425 	}
1426 
1427 	/* end clear state */
1428 	count += 2;
1429 	/* clear state */
1430 	count += 2;
1431 
1432 	return count;
1433 }
1434 
1435 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1436 				    volatile u32 *buffer)
1437 {
1438 	u32 count = 0, i;
1439 	const struct cs_section_def *sect = NULL;
1440 	const struct cs_extent_def *ext = NULL;
1441 
1442 	if (adev->gfx.rlc.cs_data == NULL)
1443 		return;
1444 	if (buffer == NULL)
1445 		return;
1446 
1447 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1448 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1449 
1450 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1451 	buffer[count++] = cpu_to_le32(0x80000000);
1452 	buffer[count++] = cpu_to_le32(0x80000000);
1453 
1454 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1455 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1456 			if (sect->id == SECT_CONTEXT) {
1457 				buffer[count++] =
1458 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1459 				buffer[count++] = cpu_to_le32(ext->reg_index -
1460 						PACKET3_SET_CONTEXT_REG_START);
1461 				for (i = 0; i < ext->reg_count; i++)
1462 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1463 			} else {
1464 				return;
1465 			}
1466 		}
1467 	}
1468 
1469 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1470 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1471 
1472 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1473 	buffer[count++] = cpu_to_le32(0);
1474 }
1475 
1476 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1477 {
1478 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1479 	uint32_t pg_always_on_cu_num = 2;
1480 	uint32_t always_on_cu_num;
1481 	uint32_t i, j, k;
1482 	uint32_t mask, cu_bitmap, counter;
1483 
1484 	if (adev->flags & AMD_IS_APU)
1485 		always_on_cu_num = 4;
1486 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1487 		always_on_cu_num = 8;
1488 	else
1489 		always_on_cu_num = 12;
1490 
1491 	mutex_lock(&adev->grbm_idx_mutex);
1492 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1493 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1494 			mask = 1;
1495 			cu_bitmap = 0;
1496 			counter = 0;
1497 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
1498 
1499 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1500 				if (cu_info->bitmap[i][j] & mask) {
1501 					if (counter == pg_always_on_cu_num)
1502 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1503 					if (counter < always_on_cu_num)
1504 						cu_bitmap |= mask;
1505 					else
1506 						break;
1507 					counter++;
1508 				}
1509 				mask <<= 1;
1510 			}
1511 
1512 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1513 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1514 		}
1515 	}
1516 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1517 	mutex_unlock(&adev->grbm_idx_mutex);
1518 }
1519 
1520 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1521 {
1522 	uint32_t data;
1523 
1524 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1525 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1526 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1527 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1528 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1529 
1530 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1531 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1532 
1533 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1534 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1535 
1536 	mutex_lock(&adev->grbm_idx_mutex);
1537 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1538 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1539 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1540 
1541 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1542 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1543 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1544 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1545 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1546 
1547 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1548 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1549 	data &= 0x0000FFFF;
1550 	data |= 0x00C00000;
1551 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1552 
1553 	/*
1554 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1555 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1556 	 */
1557 
1558 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1559 	 * but used for RLC_LB_CNTL configuration */
1560 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1561 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1562 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1563 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1564 	mutex_unlock(&adev->grbm_idx_mutex);
1565 
1566 	gfx_v9_0_init_always_on_cu_mask(adev);
1567 }
1568 
1569 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1570 {
1571 	uint32_t data;
1572 
1573 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1574 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1575 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1576 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1577 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1578 
1579 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1580 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1581 
1582 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1583 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1584 
1585 	mutex_lock(&adev->grbm_idx_mutex);
1586 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1587 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1588 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1589 
1590 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1591 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1592 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1593 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1594 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1595 
1596 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1597 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1598 	data &= 0x0000FFFF;
1599 	data |= 0x00C00000;
1600 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1601 
1602 	/*
1603 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1604 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1605 	 */
1606 
1607 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1608 	 * but used for RLC_LB_CNTL configuration */
1609 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1610 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1611 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1612 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1613 	mutex_unlock(&adev->grbm_idx_mutex);
1614 
1615 	gfx_v9_0_init_always_on_cu_mask(adev);
1616 }
1617 
1618 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1619 {
1620 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1621 }
1622 
1623 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1624 {
1625 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1626 		return 5;
1627 	else
1628 		return 4;
1629 }
1630 
1631 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1632 {
1633 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1634 
1635 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1636 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1637 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1638 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1639 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1640 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1641 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1642 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1643 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1644 }
1645 
1646 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1647 {
1648 	const struct cs_section_def *cs_data;
1649 	int r;
1650 
1651 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1652 
1653 	cs_data = adev->gfx.rlc.cs_data;
1654 
1655 	if (cs_data) {
1656 		/* init clear state block */
1657 		r = amdgpu_gfx_rlc_init_csb(adev);
1658 		if (r)
1659 			return r;
1660 	}
1661 
1662 	if (adev->flags & AMD_IS_APU) {
1663 		/* TODO: double check the cp_table_size for RV */
1664 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1665 		r = amdgpu_gfx_rlc_init_cpt(adev);
1666 		if (r)
1667 			return r;
1668 	}
1669 
1670 	switch (adev->ip_versions[GC_HWIP][0]) {
1671 	case IP_VERSION(9, 2, 2):
1672 	case IP_VERSION(9, 1, 0):
1673 		gfx_v9_0_init_lbpw(adev);
1674 		break;
1675 	case IP_VERSION(9, 4, 0):
1676 		gfx_v9_4_init_lbpw(adev);
1677 		break;
1678 	default:
1679 		break;
1680 	}
1681 
1682 	/* init spm vmid with 0xf */
1683 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1684 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1685 
1686 	return 0;
1687 }
1688 
1689 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1690 {
1691 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1692 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1693 }
1694 
1695 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1696 {
1697 	int r;
1698 	u32 *hpd;
1699 	const __le32 *fw_data;
1700 	unsigned fw_size;
1701 	u32 *fw;
1702 	size_t mec_hpd_size;
1703 
1704 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1705 
1706 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1707 
1708 	/* take ownership of the relevant compute queues */
1709 	amdgpu_gfx_compute_queue_acquire(adev);
1710 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1711 	if (mec_hpd_size) {
1712 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1713 					      AMDGPU_GEM_DOMAIN_VRAM |
1714 					      AMDGPU_GEM_DOMAIN_GTT,
1715 					      &adev->gfx.mec.hpd_eop_obj,
1716 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1717 					      (void **)&hpd);
1718 		if (r) {
1719 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1720 			gfx_v9_0_mec_fini(adev);
1721 			return r;
1722 		}
1723 
1724 		memset(hpd, 0, mec_hpd_size);
1725 
1726 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1727 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1728 	}
1729 
1730 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1731 
1732 	fw_data = (const __le32 *)
1733 		(adev->gfx.mec_fw->data +
1734 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1735 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1736 
1737 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1738 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1739 				      &adev->gfx.mec.mec_fw_obj,
1740 				      &adev->gfx.mec.mec_fw_gpu_addr,
1741 				      (void **)&fw);
1742 	if (r) {
1743 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1744 		gfx_v9_0_mec_fini(adev);
1745 		return r;
1746 	}
1747 
1748 	memcpy(fw, fw_data, fw_size);
1749 
1750 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1751 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1752 
1753 	return 0;
1754 }
1755 
1756 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1757 {
1758 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1759 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1760 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1761 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1762 		(SQ_IND_INDEX__FORCE_READ_MASK));
1763 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1764 }
1765 
1766 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1767 			   uint32_t wave, uint32_t thread,
1768 			   uint32_t regno, uint32_t num, uint32_t *out)
1769 {
1770 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1771 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1772 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1773 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1774 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1775 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1776 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1777 	while (num--)
1778 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1779 }
1780 
1781 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1782 {
1783 	/* type 1 wave data */
1784 	dst[(*no_fields)++] = 1;
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1790 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1799 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1800 }
1801 
1802 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1803 				     uint32_t wave, uint32_t start,
1804 				     uint32_t size, uint32_t *dst)
1805 {
1806 	wave_read_regs(
1807 		adev, simd, wave, 0,
1808 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1809 }
1810 
1811 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1812 				     uint32_t wave, uint32_t thread,
1813 				     uint32_t start, uint32_t size,
1814 				     uint32_t *dst)
1815 {
1816 	wave_read_regs(
1817 		adev, simd, wave, thread,
1818 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1819 }
1820 
1821 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1822 				  u32 me, u32 pipe, u32 q, u32 vm)
1823 {
1824 	soc15_grbm_select(adev, me, pipe, q, vm);
1825 }
1826 
1827 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1828         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1829         .select_se_sh = &gfx_v9_0_select_se_sh,
1830         .read_wave_data = &gfx_v9_0_read_wave_data,
1831         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1832         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1833         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1834 };
1835 
1836 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1837 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1838 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1839 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1840 };
1841 
1842 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1843 	.ras_block = {
1844 		.hw_ops = &gfx_v9_0_ras_ops,
1845 	},
1846 };
1847 
1848 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1849 {
1850 	u32 gb_addr_config;
1851 	int err;
1852 
1853 	switch (adev->ip_versions[GC_HWIP][0]) {
1854 	case IP_VERSION(9, 0, 1):
1855 		adev->gfx.config.max_hw_contexts = 8;
1856 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1857 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1858 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1859 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1860 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1861 		break;
1862 	case IP_VERSION(9, 2, 1):
1863 		adev->gfx.config.max_hw_contexts = 8;
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1868 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1869 		DRM_INFO("fix gfx.config for vega12\n");
1870 		break;
1871 	case IP_VERSION(9, 4, 0):
1872 		adev->gfx.ras = &gfx_v9_0_ras;
1873 		adev->gfx.config.max_hw_contexts = 8;
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1878 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1879 		gb_addr_config &= ~0xf3e777ff;
1880 		gb_addr_config |= 0x22014042;
1881 		/* check vbios table if gpu info is not available */
1882 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1883 		if (err)
1884 			return err;
1885 		break;
1886 	case IP_VERSION(9, 2, 2):
1887 	case IP_VERSION(9, 1, 0):
1888 		adev->gfx.config.max_hw_contexts = 8;
1889 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1894 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1895 		else
1896 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1897 		break;
1898 	case IP_VERSION(9, 4, 1):
1899 		adev->gfx.ras = &gfx_v9_4_ras;
1900 		adev->gfx.config.max_hw_contexts = 8;
1901 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1905 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1906 		gb_addr_config &= ~0xf3e777ff;
1907 		gb_addr_config |= 0x22014042;
1908 		break;
1909 	case IP_VERSION(9, 3, 0):
1910 		adev->gfx.config.max_hw_contexts = 8;
1911 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1912 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1913 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1914 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1915 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1916 		gb_addr_config &= ~0xf3e777ff;
1917 		gb_addr_config |= 0x22010042;
1918 		break;
1919 	case IP_VERSION(9, 4, 2):
1920 		adev->gfx.ras = &gfx_v9_4_2_ras;
1921 		adev->gfx.config.max_hw_contexts = 8;
1922 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1923 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1924 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1925 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1926 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1927 		gb_addr_config &= ~0xf3e777ff;
1928 		gb_addr_config |= 0x22014042;
1929 		/* check vbios table if gpu info is not available */
1930 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1931 		if (err)
1932 			return err;
1933 		break;
1934 	default:
1935 		BUG();
1936 		break;
1937 	}
1938 
1939 	adev->gfx.config.gb_addr_config = gb_addr_config;
1940 
1941 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1942 			REG_GET_FIELD(
1943 					adev->gfx.config.gb_addr_config,
1944 					GB_ADDR_CONFIG,
1945 					NUM_PIPES);
1946 
1947 	adev->gfx.config.max_tile_pipes =
1948 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1949 
1950 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1951 			REG_GET_FIELD(
1952 					adev->gfx.config.gb_addr_config,
1953 					GB_ADDR_CONFIG,
1954 					NUM_BANKS);
1955 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1956 			REG_GET_FIELD(
1957 					adev->gfx.config.gb_addr_config,
1958 					GB_ADDR_CONFIG,
1959 					MAX_COMPRESSED_FRAGS);
1960 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1961 			REG_GET_FIELD(
1962 					adev->gfx.config.gb_addr_config,
1963 					GB_ADDR_CONFIG,
1964 					NUM_RB_PER_SE);
1965 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1966 			REG_GET_FIELD(
1967 					adev->gfx.config.gb_addr_config,
1968 					GB_ADDR_CONFIG,
1969 					NUM_SHADER_ENGINES);
1970 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1971 			REG_GET_FIELD(
1972 					adev->gfx.config.gb_addr_config,
1973 					GB_ADDR_CONFIG,
1974 					PIPE_INTERLEAVE_SIZE));
1975 
1976 	return 0;
1977 }
1978 
1979 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1980 				      int mec, int pipe, int queue)
1981 {
1982 	unsigned irq_type;
1983 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1984 	unsigned int hw_prio;
1985 
1986 	ring = &adev->gfx.compute_ring[ring_id];
1987 
1988 	/* mec0 is me1 */
1989 	ring->me = mec + 1;
1990 	ring->pipe = pipe;
1991 	ring->queue = queue;
1992 
1993 	ring->ring_obj = NULL;
1994 	ring->use_doorbell = true;
1995 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1996 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1997 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1998 	ring->vm_hub = AMDGPU_GFXHUB_0;
1999 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2000 
2001 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2002 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2003 		+ ring->pipe;
2004 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2005 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2006 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2007 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2008 				hw_prio, NULL);
2009 }
2010 
2011 static int gfx_v9_0_sw_init(void *handle)
2012 {
2013 	int i, j, k, r, ring_id;
2014 	struct amdgpu_ring *ring;
2015 	struct amdgpu_kiq *kiq;
2016 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2017 	unsigned int hw_prio;
2018 
2019 	switch (adev->ip_versions[GC_HWIP][0]) {
2020 	case IP_VERSION(9, 0, 1):
2021 	case IP_VERSION(9, 2, 1):
2022 	case IP_VERSION(9, 4, 0):
2023 	case IP_VERSION(9, 2, 2):
2024 	case IP_VERSION(9, 1, 0):
2025 	case IP_VERSION(9, 4, 1):
2026 	case IP_VERSION(9, 3, 0):
2027 	case IP_VERSION(9, 4, 2):
2028 		adev->gfx.mec.num_mec = 2;
2029 		break;
2030 	default:
2031 		adev->gfx.mec.num_mec = 1;
2032 		break;
2033 	}
2034 
2035 	adev->gfx.mec.num_pipe_per_mec = 4;
2036 	adev->gfx.mec.num_queue_per_pipe = 8;
2037 
2038 	/* EOP Event */
2039 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2040 	if (r)
2041 		return r;
2042 
2043 	/* Privileged reg */
2044 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2045 			      &adev->gfx.priv_reg_irq);
2046 	if (r)
2047 		return r;
2048 
2049 	/* Privileged inst */
2050 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2051 			      &adev->gfx.priv_inst_irq);
2052 	if (r)
2053 		return r;
2054 
2055 	/* ECC error */
2056 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2057 			      &adev->gfx.cp_ecc_error_irq);
2058 	if (r)
2059 		return r;
2060 
2061 	/* FUE error */
2062 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2063 			      &adev->gfx.cp_ecc_error_irq);
2064 	if (r)
2065 		return r;
2066 
2067 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2068 
2069 	if (adev->gfx.rlc.funcs) {
2070 		if (adev->gfx.rlc.funcs->init) {
2071 			r = adev->gfx.rlc.funcs->init(adev);
2072 			if (r) {
2073 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2074 				return r;
2075 			}
2076 		}
2077 	}
2078 
2079 	r = gfx_v9_0_mec_init(adev);
2080 	if (r) {
2081 		DRM_ERROR("Failed to init MEC BOs!\n");
2082 		return r;
2083 	}
2084 
2085 	/* set up the gfx ring */
2086 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2087 		ring = &adev->gfx.gfx_ring[i];
2088 		ring->ring_obj = NULL;
2089 		if (!i)
2090 			sprintf(ring->name, "gfx");
2091 		else
2092 			sprintf(ring->name, "gfx_%d", i);
2093 		ring->use_doorbell = true;
2094 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2095 
2096 		/* disable scheduler on the real ring */
2097 		ring->no_scheduler = true;
2098 		ring->vm_hub = AMDGPU_GFXHUB_0;
2099 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2100 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2101 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2102 		if (r)
2103 			return r;
2104 	}
2105 
2106 	/* set up the software rings */
2107 	if (adev->gfx.num_gfx_rings) {
2108 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2109 			ring = &adev->gfx.sw_gfx_ring[i];
2110 			ring->ring_obj = NULL;
2111 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2112 			ring->use_doorbell = true;
2113 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2114 			ring->is_sw_ring = true;
2115 			hw_prio = amdgpu_sw_ring_priority(i);
2116 			ring->vm_hub = AMDGPU_GFXHUB_0;
2117 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2118 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2119 					     NULL);
2120 			if (r)
2121 				return r;
2122 			ring->wptr = 0;
2123 		}
2124 
2125 		/* init the muxer and add software rings */
2126 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2127 					 GFX9_NUM_SW_GFX_RINGS);
2128 		if (r) {
2129 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2130 			return r;
2131 		}
2132 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2133 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2134 							&adev->gfx.sw_gfx_ring[i]);
2135 			if (r) {
2136 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2137 				return r;
2138 			}
2139 		}
2140 	}
2141 
2142 	/* set up the compute queues - allocate horizontally across pipes */
2143 	ring_id = 0;
2144 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2145 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2146 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2147 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2148 					continue;
2149 
2150 				r = gfx_v9_0_compute_ring_init(adev,
2151 							       ring_id,
2152 							       i, k, j);
2153 				if (r)
2154 					return r;
2155 
2156 				ring_id++;
2157 			}
2158 		}
2159 	}
2160 
2161 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2162 	if (r) {
2163 		DRM_ERROR("Failed to init KIQ BOs!\n");
2164 		return r;
2165 	}
2166 
2167 	kiq = &adev->gfx.kiq;
2168 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2169 	if (r)
2170 		return r;
2171 
2172 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2173 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2174 	if (r)
2175 		return r;
2176 
2177 	adev->gfx.ce_ram_size = 0x8000;
2178 
2179 	r = gfx_v9_0_gpu_early_init(adev);
2180 	if (r)
2181 		return r;
2182 
2183 	if (amdgpu_gfx_ras_sw_init(adev)) {
2184 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2185 		return -EINVAL;
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 
2192 static int gfx_v9_0_sw_fini(void *handle)
2193 {
2194 	int i;
2195 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2196 
2197 	if (adev->gfx.num_gfx_rings) {
2198 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2199 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2200 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2201 	}
2202 
2203 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2204 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2205 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2206 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2207 
2208 	amdgpu_gfx_mqd_sw_fini(adev);
2209 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2210 	amdgpu_gfx_kiq_fini(adev);
2211 
2212 	gfx_v9_0_mec_fini(adev);
2213 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2214 				&adev->gfx.rlc.clear_state_gpu_addr,
2215 				(void **)&adev->gfx.rlc.cs_ptr);
2216 	if (adev->flags & AMD_IS_APU) {
2217 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2218 				&adev->gfx.rlc.cp_table_gpu_addr,
2219 				(void **)&adev->gfx.rlc.cp_table_ptr);
2220 	}
2221 	gfx_v9_0_free_microcode(adev);
2222 
2223 	return 0;
2224 }
2225 
2226 
2227 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2228 {
2229 	/* TODO */
2230 }
2231 
2232 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2233 			   u32 instance)
2234 {
2235 	u32 data;
2236 
2237 	if (instance == 0xffffffff)
2238 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2239 	else
2240 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2241 
2242 	if (se_num == 0xffffffff)
2243 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2244 	else
2245 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2246 
2247 	if (sh_num == 0xffffffff)
2248 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2249 	else
2250 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2251 
2252 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2253 }
2254 
2255 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2256 {
2257 	u32 data, mask;
2258 
2259 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2260 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2261 
2262 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2263 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2264 
2265 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2266 					 adev->gfx.config.max_sh_per_se);
2267 
2268 	return (~data) & mask;
2269 }
2270 
2271 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2272 {
2273 	int i, j;
2274 	u32 data;
2275 	u32 active_rbs = 0;
2276 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2277 					adev->gfx.config.max_sh_per_se;
2278 
2279 	mutex_lock(&adev->grbm_idx_mutex);
2280 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2281 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2282 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2283 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2284 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2285 					       rb_bitmap_width_per_sh);
2286 		}
2287 	}
2288 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2289 	mutex_unlock(&adev->grbm_idx_mutex);
2290 
2291 	adev->gfx.config.backend_enable_mask = active_rbs;
2292 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2293 }
2294 
2295 #define DEFAULT_SH_MEM_BASES	(0x6000)
2296 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2297 {
2298 	int i;
2299 	uint32_t sh_mem_config;
2300 	uint32_t sh_mem_bases;
2301 
2302 	/*
2303 	 * Configure apertures:
2304 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2305 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2306 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2307 	 */
2308 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2309 
2310 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2311 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2312 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2313 
2314 	mutex_lock(&adev->srbm_mutex);
2315 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2316 		soc15_grbm_select(adev, 0, 0, 0, i);
2317 		/* CP and shaders */
2318 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2319 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2320 	}
2321 	soc15_grbm_select(adev, 0, 0, 0, 0);
2322 	mutex_unlock(&adev->srbm_mutex);
2323 
2324 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2325 	   access. These should be enabled by FW for target VMIDs. */
2326 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2327 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2328 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2329 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2330 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2331 	}
2332 }
2333 
2334 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2335 {
2336 	int vmid;
2337 
2338 	/*
2339 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2340 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2341 	 * the driver can enable them for graphics. VMID0 should maintain
2342 	 * access so that HWS firmware can save/restore entries.
2343 	 */
2344 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2345 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2346 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2347 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2348 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2349 	}
2350 }
2351 
2352 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2353 {
2354 	uint32_t tmp;
2355 
2356 	switch (adev->ip_versions[GC_HWIP][0]) {
2357 	case IP_VERSION(9, 4, 1):
2358 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2359 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2360 					DISABLE_BARRIER_WAITCNT, 1);
2361 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2362 		break;
2363 	default:
2364 		break;
2365 	}
2366 }
2367 
2368 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2369 {
2370 	u32 tmp;
2371 	int i;
2372 
2373 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2374 
2375 	gfx_v9_0_tiling_mode_table_init(adev);
2376 
2377 	if (adev->gfx.num_gfx_rings)
2378 		gfx_v9_0_setup_rb(adev);
2379 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2380 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2381 
2382 	/* XXX SH_MEM regs */
2383 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2384 	mutex_lock(&adev->srbm_mutex);
2385 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2386 		soc15_grbm_select(adev, 0, 0, 0, i);
2387 		/* CP and shaders */
2388 		if (i == 0) {
2389 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2390 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2391 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2392 					    !!adev->gmc.noretry);
2393 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2394 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2395 		} else {
2396 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2397 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2398 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2399 					    !!adev->gmc.noretry);
2400 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2401 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2402 				(adev->gmc.private_aperture_start >> 48));
2403 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2404 				(adev->gmc.shared_aperture_start >> 48));
2405 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2406 		}
2407 	}
2408 	soc15_grbm_select(adev, 0, 0, 0, 0);
2409 
2410 	mutex_unlock(&adev->srbm_mutex);
2411 
2412 	gfx_v9_0_init_compute_vmid(adev);
2413 	gfx_v9_0_init_gds_vmid(adev);
2414 	gfx_v9_0_init_sq_config(adev);
2415 }
2416 
2417 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2418 {
2419 	u32 i, j, k;
2420 	u32 mask;
2421 
2422 	mutex_lock(&adev->grbm_idx_mutex);
2423 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2424 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2425 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2426 			for (k = 0; k < adev->usec_timeout; k++) {
2427 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2428 					break;
2429 				udelay(1);
2430 			}
2431 			if (k == adev->usec_timeout) {
2432 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2433 						      0xffffffff, 0xffffffff);
2434 				mutex_unlock(&adev->grbm_idx_mutex);
2435 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2436 					 i, j);
2437 				return;
2438 			}
2439 		}
2440 	}
2441 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2442 	mutex_unlock(&adev->grbm_idx_mutex);
2443 
2444 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2445 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2446 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2447 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2448 	for (k = 0; k < adev->usec_timeout; k++) {
2449 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2450 			break;
2451 		udelay(1);
2452 	}
2453 }
2454 
2455 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2456 					       bool enable)
2457 {
2458 	u32 tmp;
2459 
2460 	/* These interrupts should be enabled to drive DS clock */
2461 
2462 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2463 
2464 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2465 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2466 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2467 	if(adev->gfx.num_gfx_rings)
2468 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2469 
2470 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2471 }
2472 
2473 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2474 {
2475 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2476 	/* csib */
2477 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2478 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2479 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2480 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2481 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2482 			adev->gfx.rlc.clear_state_size);
2483 }
2484 
2485 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2486 				int indirect_offset,
2487 				int list_size,
2488 				int *unique_indirect_regs,
2489 				int unique_indirect_reg_count,
2490 				int *indirect_start_offsets,
2491 				int *indirect_start_offsets_count,
2492 				int max_start_offsets_count)
2493 {
2494 	int idx;
2495 
2496 	for (; indirect_offset < list_size; indirect_offset++) {
2497 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2498 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2499 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2500 
2501 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2502 			indirect_offset += 2;
2503 
2504 			/* look for the matching indice */
2505 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2506 				if (unique_indirect_regs[idx] ==
2507 					register_list_format[indirect_offset] ||
2508 					!unique_indirect_regs[idx])
2509 					break;
2510 			}
2511 
2512 			BUG_ON(idx >= unique_indirect_reg_count);
2513 
2514 			if (!unique_indirect_regs[idx])
2515 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2516 
2517 			indirect_offset++;
2518 		}
2519 	}
2520 }
2521 
2522 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2523 {
2524 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2525 	int unique_indirect_reg_count = 0;
2526 
2527 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2528 	int indirect_start_offsets_count = 0;
2529 
2530 	int list_size = 0;
2531 	int i = 0, j = 0;
2532 	u32 tmp = 0;
2533 
2534 	u32 *register_list_format =
2535 		kmemdup(adev->gfx.rlc.register_list_format,
2536 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2537 	if (!register_list_format)
2538 		return -ENOMEM;
2539 
2540 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2541 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2542 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2543 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2544 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2545 				    unique_indirect_regs,
2546 				    unique_indirect_reg_count,
2547 				    indirect_start_offsets,
2548 				    &indirect_start_offsets_count,
2549 				    ARRAY_SIZE(indirect_start_offsets));
2550 
2551 	/* enable auto inc in case it is disabled */
2552 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2553 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2554 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2555 
2556 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2557 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2558 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2559 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2560 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2561 			adev->gfx.rlc.register_restore[i]);
2562 
2563 	/* load indirect register */
2564 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2565 		adev->gfx.rlc.reg_list_format_start);
2566 
2567 	/* direct register portion */
2568 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2569 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2570 			register_list_format[i]);
2571 
2572 	/* indirect register portion */
2573 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2574 		if (register_list_format[i] == 0xFFFFFFFF) {
2575 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2576 			continue;
2577 		}
2578 
2579 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2580 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2581 
2582 		for (j = 0; j < unique_indirect_reg_count; j++) {
2583 			if (register_list_format[i] == unique_indirect_regs[j]) {
2584 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2585 				break;
2586 			}
2587 		}
2588 
2589 		BUG_ON(j >= unique_indirect_reg_count);
2590 
2591 		i++;
2592 	}
2593 
2594 	/* set save/restore list size */
2595 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2596 	list_size = list_size >> 1;
2597 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2598 		adev->gfx.rlc.reg_restore_list_size);
2599 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2600 
2601 	/* write the starting offsets to RLC scratch ram */
2602 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2603 		adev->gfx.rlc.starting_offsets_start);
2604 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2605 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2606 		       indirect_start_offsets[i]);
2607 
2608 	/* load unique indirect regs*/
2609 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2610 		if (unique_indirect_regs[i] != 0) {
2611 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2612 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2613 			       unique_indirect_regs[i] & 0x3FFFF);
2614 
2615 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2616 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2617 			       unique_indirect_regs[i] >> 20);
2618 		}
2619 	}
2620 
2621 	kfree(register_list_format);
2622 	return 0;
2623 }
2624 
2625 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2626 {
2627 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2628 }
2629 
2630 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2631 					     bool enable)
2632 {
2633 	uint32_t data = 0;
2634 	uint32_t default_data = 0;
2635 
2636 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2637 	if (enable) {
2638 		/* enable GFXIP control over CGPG */
2639 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2640 		if(default_data != data)
2641 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2642 
2643 		/* update status */
2644 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2645 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2646 		if(default_data != data)
2647 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2648 	} else {
2649 		/* restore GFXIP control over GCPG */
2650 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2651 		if(default_data != data)
2652 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2653 	}
2654 }
2655 
2656 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2657 {
2658 	uint32_t data = 0;
2659 
2660 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2661 			      AMD_PG_SUPPORT_GFX_SMG |
2662 			      AMD_PG_SUPPORT_GFX_DMG)) {
2663 		/* init IDLE_POLL_COUNT = 60 */
2664 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2665 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2666 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2667 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2668 
2669 		/* init RLC PG Delay */
2670 		data = 0;
2671 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2672 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2673 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2674 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2675 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2676 
2677 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2678 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2679 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2680 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2681 
2682 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2683 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2684 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2685 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2686 
2687 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2688 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2689 
2690 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2691 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2692 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2693 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2694 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2695 	}
2696 }
2697 
2698 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2699 						bool enable)
2700 {
2701 	uint32_t data = 0;
2702 	uint32_t default_data = 0;
2703 
2704 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2705 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2706 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2707 			     enable ? 1 : 0);
2708 	if (default_data != data)
2709 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2710 }
2711 
2712 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2713 						bool enable)
2714 {
2715 	uint32_t data = 0;
2716 	uint32_t default_data = 0;
2717 
2718 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2719 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2720 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2721 			     enable ? 1 : 0);
2722 	if(default_data != data)
2723 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2724 }
2725 
2726 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2727 					bool enable)
2728 {
2729 	uint32_t data = 0;
2730 	uint32_t default_data = 0;
2731 
2732 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2733 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2734 			     CP_PG_DISABLE,
2735 			     enable ? 0 : 1);
2736 	if(default_data != data)
2737 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2738 }
2739 
2740 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2741 						bool enable)
2742 {
2743 	uint32_t data, default_data;
2744 
2745 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2746 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2747 			     GFX_POWER_GATING_ENABLE,
2748 			     enable ? 1 : 0);
2749 	if(default_data != data)
2750 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2751 }
2752 
2753 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2754 						bool enable)
2755 {
2756 	uint32_t data, default_data;
2757 
2758 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2759 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2760 			     GFX_PIPELINE_PG_ENABLE,
2761 			     enable ? 1 : 0);
2762 	if(default_data != data)
2763 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2764 
2765 	if (!enable)
2766 		/* read any GFX register to wake up GFX */
2767 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2768 }
2769 
2770 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2771 						       bool enable)
2772 {
2773 	uint32_t data, default_data;
2774 
2775 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2776 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2777 			     STATIC_PER_CU_PG_ENABLE,
2778 			     enable ? 1 : 0);
2779 	if(default_data != data)
2780 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2781 }
2782 
2783 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2784 						bool enable)
2785 {
2786 	uint32_t data, default_data;
2787 
2788 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2789 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2790 			     DYN_PER_CU_PG_ENABLE,
2791 			     enable ? 1 : 0);
2792 	if(default_data != data)
2793 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2794 }
2795 
2796 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2797 {
2798 	gfx_v9_0_init_csb(adev);
2799 
2800 	/*
2801 	 * Rlc save restore list is workable since v2_1.
2802 	 * And it's needed by gfxoff feature.
2803 	 */
2804 	if (adev->gfx.rlc.is_rlc_v2_1) {
2805 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2806 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2807 			gfx_v9_1_init_rlc_save_restore_list(adev);
2808 		gfx_v9_0_enable_save_restore_machine(adev);
2809 	}
2810 
2811 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2812 			      AMD_PG_SUPPORT_GFX_SMG |
2813 			      AMD_PG_SUPPORT_GFX_DMG |
2814 			      AMD_PG_SUPPORT_CP |
2815 			      AMD_PG_SUPPORT_GDS |
2816 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2817 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2818 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2819 		gfx_v9_0_init_gfx_power_gating(adev);
2820 	}
2821 }
2822 
2823 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2824 {
2825 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2826 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2827 	gfx_v9_0_wait_for_rlc_serdes(adev);
2828 }
2829 
2830 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2831 {
2832 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2833 	udelay(50);
2834 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2835 	udelay(50);
2836 }
2837 
2838 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2839 {
2840 #ifdef AMDGPU_RLC_DEBUG_RETRY
2841 	u32 rlc_ucode_ver;
2842 #endif
2843 
2844 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2845 	udelay(50);
2846 
2847 	/* carrizo do enable cp interrupt after cp inited */
2848 	if (!(adev->flags & AMD_IS_APU)) {
2849 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2850 		udelay(50);
2851 	}
2852 
2853 #ifdef AMDGPU_RLC_DEBUG_RETRY
2854 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2855 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2856 	if(rlc_ucode_ver == 0x108) {
2857 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2858 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2859 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2860 		 * default is 0x9C4 to create a 100us interval */
2861 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2862 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2863 		 * to disable the page fault retry interrupts, default is
2864 		 * 0x100 (256) */
2865 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2866 	}
2867 #endif
2868 }
2869 
2870 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2871 {
2872 	const struct rlc_firmware_header_v2_0 *hdr;
2873 	const __le32 *fw_data;
2874 	unsigned i, fw_size;
2875 
2876 	if (!adev->gfx.rlc_fw)
2877 		return -EINVAL;
2878 
2879 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2880 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2881 
2882 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2883 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2884 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2885 
2886 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2887 			RLCG_UCODE_LOADING_START_ADDRESS);
2888 	for (i = 0; i < fw_size; i++)
2889 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2890 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2891 
2892 	return 0;
2893 }
2894 
2895 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2896 {
2897 	int r;
2898 
2899 	if (amdgpu_sriov_vf(adev)) {
2900 		gfx_v9_0_init_csb(adev);
2901 		return 0;
2902 	}
2903 
2904 	adev->gfx.rlc.funcs->stop(adev);
2905 
2906 	/* disable CG */
2907 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2908 
2909 	gfx_v9_0_init_pg(adev);
2910 
2911 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2912 		/* legacy rlc firmware loading */
2913 		r = gfx_v9_0_rlc_load_microcode(adev);
2914 		if (r)
2915 			return r;
2916 	}
2917 
2918 	switch (adev->ip_versions[GC_HWIP][0]) {
2919 	case IP_VERSION(9, 2, 2):
2920 	case IP_VERSION(9, 1, 0):
2921 		if (amdgpu_lbpw == 0)
2922 			gfx_v9_0_enable_lbpw(adev, false);
2923 		else
2924 			gfx_v9_0_enable_lbpw(adev, true);
2925 		break;
2926 	case IP_VERSION(9, 4, 0):
2927 		if (amdgpu_lbpw > 0)
2928 			gfx_v9_0_enable_lbpw(adev, true);
2929 		else
2930 			gfx_v9_0_enable_lbpw(adev, false);
2931 		break;
2932 	default:
2933 		break;
2934 	}
2935 
2936 	adev->gfx.rlc.funcs->start(adev);
2937 
2938 	return 0;
2939 }
2940 
2941 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2942 {
2943 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2944 
2945 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2946 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2947 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2948 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2949 	udelay(50);
2950 }
2951 
2952 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2953 {
2954 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2955 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2956 	const struct gfx_firmware_header_v1_0 *me_hdr;
2957 	const __le32 *fw_data;
2958 	unsigned i, fw_size;
2959 
2960 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2961 		return -EINVAL;
2962 
2963 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2964 		adev->gfx.pfp_fw->data;
2965 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2966 		adev->gfx.ce_fw->data;
2967 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2968 		adev->gfx.me_fw->data;
2969 
2970 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2971 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2972 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2973 
2974 	gfx_v9_0_cp_gfx_enable(adev, false);
2975 
2976 	/* PFP */
2977 	fw_data = (const __le32 *)
2978 		(adev->gfx.pfp_fw->data +
2979 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2980 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2981 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2982 	for (i = 0; i < fw_size; i++)
2983 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2984 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2985 
2986 	/* CE */
2987 	fw_data = (const __le32 *)
2988 		(adev->gfx.ce_fw->data +
2989 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2990 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2991 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2992 	for (i = 0; i < fw_size; i++)
2993 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2994 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2995 
2996 	/* ME */
2997 	fw_data = (const __le32 *)
2998 		(adev->gfx.me_fw->data +
2999 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3000 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3001 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3002 	for (i = 0; i < fw_size; i++)
3003 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3004 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3005 
3006 	return 0;
3007 }
3008 
3009 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3010 {
3011 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3012 	const struct cs_section_def *sect = NULL;
3013 	const struct cs_extent_def *ext = NULL;
3014 	int r, i, tmp;
3015 
3016 	/* init the CP */
3017 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3018 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3019 
3020 	gfx_v9_0_cp_gfx_enable(adev, true);
3021 
3022 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3023 	if (r) {
3024 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3025 		return r;
3026 	}
3027 
3028 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3029 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3030 
3031 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3032 	amdgpu_ring_write(ring, 0x80000000);
3033 	amdgpu_ring_write(ring, 0x80000000);
3034 
3035 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3036 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3037 			if (sect->id == SECT_CONTEXT) {
3038 				amdgpu_ring_write(ring,
3039 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3040 					       ext->reg_count));
3041 				amdgpu_ring_write(ring,
3042 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3043 				for (i = 0; i < ext->reg_count; i++)
3044 					amdgpu_ring_write(ring, ext->extent[i]);
3045 			}
3046 		}
3047 	}
3048 
3049 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3050 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3051 
3052 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3053 	amdgpu_ring_write(ring, 0);
3054 
3055 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3056 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3057 	amdgpu_ring_write(ring, 0x8000);
3058 	amdgpu_ring_write(ring, 0x8000);
3059 
3060 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3061 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3062 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3063 	amdgpu_ring_write(ring, tmp);
3064 	amdgpu_ring_write(ring, 0);
3065 
3066 	amdgpu_ring_commit(ring);
3067 
3068 	return 0;
3069 }
3070 
3071 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3072 {
3073 	struct amdgpu_ring *ring;
3074 	u32 tmp;
3075 	u32 rb_bufsz;
3076 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3077 
3078 	/* Set the write pointer delay */
3079 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3080 
3081 	/* set the RB to use vmid 0 */
3082 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3083 
3084 	/* Set ring buffer size */
3085 	ring = &adev->gfx.gfx_ring[0];
3086 	rb_bufsz = order_base_2(ring->ring_size / 8);
3087 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3088 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3089 #ifdef __BIG_ENDIAN
3090 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3091 #endif
3092 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3093 
3094 	/* Initialize the ring buffer's write pointers */
3095 	ring->wptr = 0;
3096 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3097 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3098 
3099 	/* set the wb address wether it's enabled or not */
3100 	rptr_addr = ring->rptr_gpu_addr;
3101 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3102 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3103 
3104 	wptr_gpu_addr = ring->wptr_gpu_addr;
3105 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3106 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3107 
3108 	mdelay(1);
3109 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3110 
3111 	rb_addr = ring->gpu_addr >> 8;
3112 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3113 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3114 
3115 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3116 	if (ring->use_doorbell) {
3117 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3118 				    DOORBELL_OFFSET, ring->doorbell_index);
3119 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3120 				    DOORBELL_EN, 1);
3121 	} else {
3122 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3123 	}
3124 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3125 
3126 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3127 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3128 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3129 
3130 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3131 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3132 
3133 
3134 	/* start the ring */
3135 	gfx_v9_0_cp_gfx_start(adev);
3136 	ring->sched.ready = true;
3137 
3138 	return 0;
3139 }
3140 
3141 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3142 {
3143 	if (enable) {
3144 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3145 	} else {
3146 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3147 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3148 		adev->gfx.kiq.ring.sched.ready = false;
3149 	}
3150 	udelay(50);
3151 }
3152 
3153 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3154 {
3155 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3156 	const __le32 *fw_data;
3157 	unsigned i;
3158 	u32 tmp;
3159 
3160 	if (!adev->gfx.mec_fw)
3161 		return -EINVAL;
3162 
3163 	gfx_v9_0_cp_compute_enable(adev, false);
3164 
3165 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3166 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3167 
3168 	fw_data = (const __le32 *)
3169 		(adev->gfx.mec_fw->data +
3170 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3171 	tmp = 0;
3172 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3173 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3174 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3175 
3176 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3177 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3178 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3179 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3180 
3181 	/* MEC1 */
3182 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3183 			 mec_hdr->jt_offset);
3184 	for (i = 0; i < mec_hdr->jt_size; i++)
3185 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3186 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3187 
3188 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3189 			adev->gfx.mec_fw_version);
3190 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3191 
3192 	return 0;
3193 }
3194 
3195 /* KIQ functions */
3196 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3197 {
3198 	uint32_t tmp;
3199 	struct amdgpu_device *adev = ring->adev;
3200 
3201 	/* tell RLC which is KIQ queue */
3202 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3203 	tmp &= 0xffffff00;
3204 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3205 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3206 	tmp |= 0x80;
3207 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3208 }
3209 
3210 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3211 {
3212 	struct amdgpu_device *adev = ring->adev;
3213 
3214 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3215 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3216 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3217 			mqd->cp_hqd_queue_priority =
3218 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3219 		}
3220 	}
3221 }
3222 
3223 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3224 {
3225 	struct amdgpu_device *adev = ring->adev;
3226 	struct v9_mqd *mqd = ring->mqd_ptr;
3227 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3228 	uint32_t tmp;
3229 
3230 	mqd->header = 0xC0310800;
3231 	mqd->compute_pipelinestat_enable = 0x00000001;
3232 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3233 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3234 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3235 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3236 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3237 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3238 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3239 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3240 	mqd->compute_misc_reserved = 0x00000003;
3241 
3242 	mqd->dynamic_cu_mask_addr_lo =
3243 		lower_32_bits(ring->mqd_gpu_addr
3244 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3245 	mqd->dynamic_cu_mask_addr_hi =
3246 		upper_32_bits(ring->mqd_gpu_addr
3247 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3248 
3249 	eop_base_addr = ring->eop_gpu_addr >> 8;
3250 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3251 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3252 
3253 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3254 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3255 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3256 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3257 
3258 	mqd->cp_hqd_eop_control = tmp;
3259 
3260 	/* enable doorbell? */
3261 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3262 
3263 	if (ring->use_doorbell) {
3264 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3265 				    DOORBELL_OFFSET, ring->doorbell_index);
3266 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3267 				    DOORBELL_EN, 1);
3268 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3269 				    DOORBELL_SOURCE, 0);
3270 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3271 				    DOORBELL_HIT, 0);
3272 	} else {
3273 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3274 					 DOORBELL_EN, 0);
3275 	}
3276 
3277 	mqd->cp_hqd_pq_doorbell_control = tmp;
3278 
3279 	/* disable the queue if it's active */
3280 	ring->wptr = 0;
3281 	mqd->cp_hqd_dequeue_request = 0;
3282 	mqd->cp_hqd_pq_rptr = 0;
3283 	mqd->cp_hqd_pq_wptr_lo = 0;
3284 	mqd->cp_hqd_pq_wptr_hi = 0;
3285 
3286 	/* set the pointer to the MQD */
3287 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3288 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3289 
3290 	/* set MQD vmid to 0 */
3291 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3292 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3293 	mqd->cp_mqd_control = tmp;
3294 
3295 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3296 	hqd_gpu_addr = ring->gpu_addr >> 8;
3297 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3298 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3299 
3300 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3301 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3302 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3303 			    (order_base_2(ring->ring_size / 4) - 1));
3304 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3305 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3306 #ifdef __BIG_ENDIAN
3307 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3308 #endif
3309 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3310 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3311 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3312 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3313 	mqd->cp_hqd_pq_control = tmp;
3314 
3315 	/* set the wb address whether it's enabled or not */
3316 	wb_gpu_addr = ring->rptr_gpu_addr;
3317 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3318 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3319 		upper_32_bits(wb_gpu_addr) & 0xffff;
3320 
3321 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3322 	wb_gpu_addr = ring->wptr_gpu_addr;
3323 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3324 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3325 
3326 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3327 	ring->wptr = 0;
3328 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3329 
3330 	/* set the vmid for the queue */
3331 	mqd->cp_hqd_vmid = 0;
3332 
3333 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3334 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3335 	mqd->cp_hqd_persistent_state = tmp;
3336 
3337 	/* set MIN_IB_AVAIL_SIZE */
3338 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3339 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3340 	mqd->cp_hqd_ib_control = tmp;
3341 
3342 	/* set static priority for a queue/ring */
3343 	gfx_v9_0_mqd_set_priority(ring, mqd);
3344 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3345 
3346 	/* map_queues packet doesn't need activate the queue,
3347 	 * so only kiq need set this field.
3348 	 */
3349 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3350 		mqd->cp_hqd_active = 1;
3351 
3352 	return 0;
3353 }
3354 
3355 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3356 {
3357 	struct amdgpu_device *adev = ring->adev;
3358 	struct v9_mqd *mqd = ring->mqd_ptr;
3359 	int j;
3360 
3361 	/* disable wptr polling */
3362 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3363 
3364 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3365 	       mqd->cp_hqd_eop_base_addr_lo);
3366 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3367 	       mqd->cp_hqd_eop_base_addr_hi);
3368 
3369 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3370 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3371 	       mqd->cp_hqd_eop_control);
3372 
3373 	/* enable doorbell? */
3374 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3375 	       mqd->cp_hqd_pq_doorbell_control);
3376 
3377 	/* disable the queue if it's active */
3378 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3379 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3380 		for (j = 0; j < adev->usec_timeout; j++) {
3381 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3382 				break;
3383 			udelay(1);
3384 		}
3385 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3386 		       mqd->cp_hqd_dequeue_request);
3387 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3388 		       mqd->cp_hqd_pq_rptr);
3389 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3390 		       mqd->cp_hqd_pq_wptr_lo);
3391 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3392 		       mqd->cp_hqd_pq_wptr_hi);
3393 	}
3394 
3395 	/* set the pointer to the MQD */
3396 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3397 	       mqd->cp_mqd_base_addr_lo);
3398 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3399 	       mqd->cp_mqd_base_addr_hi);
3400 
3401 	/* set MQD vmid to 0 */
3402 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3403 	       mqd->cp_mqd_control);
3404 
3405 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3406 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3407 	       mqd->cp_hqd_pq_base_lo);
3408 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3409 	       mqd->cp_hqd_pq_base_hi);
3410 
3411 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3412 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3413 	       mqd->cp_hqd_pq_control);
3414 
3415 	/* set the wb address whether it's enabled or not */
3416 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3417 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3418 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3419 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3420 
3421 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3422 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3423 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3424 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3425 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3426 
3427 	/* enable the doorbell if requested */
3428 	if (ring->use_doorbell) {
3429 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3430 					(adev->doorbell_index.kiq * 2) << 2);
3431 		/* If GC has entered CGPG, ringing doorbell > first page
3432 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3433 		 * workaround this issue. And this change has to align with firmware
3434 		 * update.
3435 		 */
3436 		if (check_if_enlarge_doorbell_range(adev))
3437 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3438 					(adev->doorbell.size - 4));
3439 		else
3440 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3441 					(adev->doorbell_index.userqueue_end * 2) << 2);
3442 	}
3443 
3444 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3445 	       mqd->cp_hqd_pq_doorbell_control);
3446 
3447 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3448 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3449 	       mqd->cp_hqd_pq_wptr_lo);
3450 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3451 	       mqd->cp_hqd_pq_wptr_hi);
3452 
3453 	/* set the vmid for the queue */
3454 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3455 
3456 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3457 	       mqd->cp_hqd_persistent_state);
3458 
3459 	/* activate the queue */
3460 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3461 	       mqd->cp_hqd_active);
3462 
3463 	if (ring->use_doorbell)
3464 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3465 
3466 	return 0;
3467 }
3468 
3469 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3470 {
3471 	struct amdgpu_device *adev = ring->adev;
3472 	int j;
3473 
3474 	/* disable the queue if it's active */
3475 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3476 
3477 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3478 
3479 		for (j = 0; j < adev->usec_timeout; j++) {
3480 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3481 				break;
3482 			udelay(1);
3483 		}
3484 
3485 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3486 			DRM_DEBUG("KIQ dequeue request failed.\n");
3487 
3488 			/* Manual disable if dequeue request times out */
3489 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3490 		}
3491 
3492 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3493 		      0);
3494 	}
3495 
3496 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3497 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3498 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3499 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3500 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3501 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3502 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3503 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3504 
3505 	return 0;
3506 }
3507 
3508 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3509 {
3510 	struct amdgpu_device *adev = ring->adev;
3511 	struct v9_mqd *mqd = ring->mqd_ptr;
3512 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3513 	struct v9_mqd *tmp_mqd;
3514 
3515 	gfx_v9_0_kiq_setting(ring);
3516 
3517 	/* GPU could be in bad state during probe, driver trigger the reset
3518 	 * after load the SMU, in this case , the mqd is not be initialized.
3519 	 * driver need to re-init the mqd.
3520 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3521 	 */
3522 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3523 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3524 		/* for GPU_RESET case , reset MQD to a clean status */
3525 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3526 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3527 
3528 		/* reset ring buffer */
3529 		ring->wptr = 0;
3530 		amdgpu_ring_clear_ring(ring);
3531 
3532 		mutex_lock(&adev->srbm_mutex);
3533 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3534 		gfx_v9_0_kiq_init_register(ring);
3535 		soc15_grbm_select(adev, 0, 0, 0, 0);
3536 		mutex_unlock(&adev->srbm_mutex);
3537 	} else {
3538 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3539 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3540 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3541 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3542 			amdgpu_ring_clear_ring(ring);
3543 		mutex_lock(&adev->srbm_mutex);
3544 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3545 		gfx_v9_0_mqd_init(ring);
3546 		gfx_v9_0_kiq_init_register(ring);
3547 		soc15_grbm_select(adev, 0, 0, 0, 0);
3548 		mutex_unlock(&adev->srbm_mutex);
3549 
3550 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3551 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3552 	}
3553 
3554 	return 0;
3555 }
3556 
3557 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3558 {
3559 	struct amdgpu_device *adev = ring->adev;
3560 	struct v9_mqd *mqd = ring->mqd_ptr;
3561 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3562 	struct v9_mqd *tmp_mqd;
3563 
3564 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3565 	 * is not be initialized before
3566 	 */
3567 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3568 
3569 	if (!tmp_mqd->cp_hqd_pq_control ||
3570 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3571 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3572 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3573 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3574 		mutex_lock(&adev->srbm_mutex);
3575 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3576 		gfx_v9_0_mqd_init(ring);
3577 		soc15_grbm_select(adev, 0, 0, 0, 0);
3578 		mutex_unlock(&adev->srbm_mutex);
3579 
3580 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3581 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3582 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3583 		/* reset MQD to a clean status */
3584 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3585 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3586 
3587 		/* reset ring buffer */
3588 		ring->wptr = 0;
3589 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3590 		amdgpu_ring_clear_ring(ring);
3591 	} else {
3592 		amdgpu_ring_clear_ring(ring);
3593 	}
3594 
3595 	return 0;
3596 }
3597 
3598 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3599 {
3600 	struct amdgpu_ring *ring;
3601 	int r;
3602 
3603 	ring = &adev->gfx.kiq.ring;
3604 
3605 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3606 	if (unlikely(r != 0))
3607 		return r;
3608 
3609 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3610 	if (unlikely(r != 0)) {
3611 		amdgpu_bo_unreserve(ring->mqd_obj);
3612 		return r;
3613 	}
3614 
3615 	gfx_v9_0_kiq_init_queue(ring);
3616 	amdgpu_bo_kunmap(ring->mqd_obj);
3617 	ring->mqd_ptr = NULL;
3618 	amdgpu_bo_unreserve(ring->mqd_obj);
3619 	ring->sched.ready = true;
3620 	return 0;
3621 }
3622 
3623 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3624 {
3625 	struct amdgpu_ring *ring = NULL;
3626 	int r = 0, i;
3627 
3628 	gfx_v9_0_cp_compute_enable(adev, true);
3629 
3630 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3631 		ring = &adev->gfx.compute_ring[i];
3632 
3633 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3634 		if (unlikely(r != 0))
3635 			goto done;
3636 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3637 		if (!r) {
3638 			r = gfx_v9_0_kcq_init_queue(ring);
3639 			amdgpu_bo_kunmap(ring->mqd_obj);
3640 			ring->mqd_ptr = NULL;
3641 		}
3642 		amdgpu_bo_unreserve(ring->mqd_obj);
3643 		if (r)
3644 			goto done;
3645 	}
3646 
3647 	r = amdgpu_gfx_enable_kcq(adev);
3648 done:
3649 	return r;
3650 }
3651 
3652 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3653 {
3654 	int r, i;
3655 	struct amdgpu_ring *ring;
3656 
3657 	if (!(adev->flags & AMD_IS_APU))
3658 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3659 
3660 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3661 		if (adev->gfx.num_gfx_rings) {
3662 			/* legacy firmware loading */
3663 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3664 			if (r)
3665 				return r;
3666 		}
3667 
3668 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3669 		if (r)
3670 			return r;
3671 	}
3672 
3673 	r = gfx_v9_0_kiq_resume(adev);
3674 	if (r)
3675 		return r;
3676 
3677 	if (adev->gfx.num_gfx_rings) {
3678 		r = gfx_v9_0_cp_gfx_resume(adev);
3679 		if (r)
3680 			return r;
3681 	}
3682 
3683 	r = gfx_v9_0_kcq_resume(adev);
3684 	if (r)
3685 		return r;
3686 
3687 	if (adev->gfx.num_gfx_rings) {
3688 		ring = &adev->gfx.gfx_ring[0];
3689 		r = amdgpu_ring_test_helper(ring);
3690 		if (r)
3691 			return r;
3692 	}
3693 
3694 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3695 		ring = &adev->gfx.compute_ring[i];
3696 		amdgpu_ring_test_helper(ring);
3697 	}
3698 
3699 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3700 
3701 	return 0;
3702 }
3703 
3704 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3705 {
3706 	u32 tmp;
3707 
3708 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3709 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3710 		return;
3711 
3712 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3713 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3714 				adev->df.hash_status.hash_64k);
3715 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3716 				adev->df.hash_status.hash_2m);
3717 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3718 				adev->df.hash_status.hash_1g);
3719 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3720 }
3721 
3722 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3723 {
3724 	if (adev->gfx.num_gfx_rings)
3725 		gfx_v9_0_cp_gfx_enable(adev, enable);
3726 	gfx_v9_0_cp_compute_enable(adev, enable);
3727 }
3728 
3729 static int gfx_v9_0_hw_init(void *handle)
3730 {
3731 	int r;
3732 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3733 
3734 	if (!amdgpu_sriov_vf(adev))
3735 		gfx_v9_0_init_golden_registers(adev);
3736 
3737 	gfx_v9_0_constants_init(adev);
3738 
3739 	gfx_v9_0_init_tcp_config(adev);
3740 
3741 	r = adev->gfx.rlc.funcs->resume(adev);
3742 	if (r)
3743 		return r;
3744 
3745 	r = gfx_v9_0_cp_resume(adev);
3746 	if (r)
3747 		return r;
3748 
3749 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3750 		gfx_v9_4_2_set_power_brake_sequence(adev);
3751 
3752 	return r;
3753 }
3754 
3755 static int gfx_v9_0_hw_fini(void *handle)
3756 {
3757 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3758 
3759 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3760 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3761 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3762 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3763 
3764 	/* DF freeze and kcq disable will fail */
3765 	if (!amdgpu_ras_intr_triggered())
3766 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3767 		amdgpu_gfx_disable_kcq(adev);
3768 
3769 	if (amdgpu_sriov_vf(adev)) {
3770 		gfx_v9_0_cp_gfx_enable(adev, false);
3771 		/* must disable polling for SRIOV when hw finished, otherwise
3772 		 * CPC engine may still keep fetching WB address which is already
3773 		 * invalid after sw finished and trigger DMAR reading error in
3774 		 * hypervisor side.
3775 		 */
3776 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3777 		return 0;
3778 	}
3779 
3780 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3781 	 * otherwise KIQ is hanging when binding back
3782 	 */
3783 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3784 		mutex_lock(&adev->srbm_mutex);
3785 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3786 				adev->gfx.kiq.ring.pipe,
3787 				adev->gfx.kiq.ring.queue, 0);
3788 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3789 		soc15_grbm_select(adev, 0, 0, 0, 0);
3790 		mutex_unlock(&adev->srbm_mutex);
3791 	}
3792 
3793 	gfx_v9_0_cp_enable(adev, false);
3794 
3795 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3796 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3797 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3798 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3799 		return 0;
3800 	}
3801 
3802 	adev->gfx.rlc.funcs->stop(adev);
3803 	return 0;
3804 }
3805 
3806 static int gfx_v9_0_suspend(void *handle)
3807 {
3808 	return gfx_v9_0_hw_fini(handle);
3809 }
3810 
3811 static int gfx_v9_0_resume(void *handle)
3812 {
3813 	return gfx_v9_0_hw_init(handle);
3814 }
3815 
3816 static bool gfx_v9_0_is_idle(void *handle)
3817 {
3818 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3819 
3820 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3821 				GRBM_STATUS, GUI_ACTIVE))
3822 		return false;
3823 	else
3824 		return true;
3825 }
3826 
3827 static int gfx_v9_0_wait_for_idle(void *handle)
3828 {
3829 	unsigned i;
3830 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3831 
3832 	for (i = 0; i < adev->usec_timeout; i++) {
3833 		if (gfx_v9_0_is_idle(handle))
3834 			return 0;
3835 		udelay(1);
3836 	}
3837 	return -ETIMEDOUT;
3838 }
3839 
3840 static int gfx_v9_0_soft_reset(void *handle)
3841 {
3842 	u32 grbm_soft_reset = 0;
3843 	u32 tmp;
3844 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3845 
3846 	/* GRBM_STATUS */
3847 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3848 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3849 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3850 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3851 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3852 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3853 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3854 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3855 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3856 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3857 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3858 	}
3859 
3860 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3861 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3862 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3863 	}
3864 
3865 	/* GRBM_STATUS2 */
3866 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3867 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3868 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3869 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3870 
3871 
3872 	if (grbm_soft_reset) {
3873 		/* stop the rlc */
3874 		adev->gfx.rlc.funcs->stop(adev);
3875 
3876 		if (adev->gfx.num_gfx_rings)
3877 			/* Disable GFX parsing/prefetching */
3878 			gfx_v9_0_cp_gfx_enable(adev, false);
3879 
3880 		/* Disable MEC parsing/prefetching */
3881 		gfx_v9_0_cp_compute_enable(adev, false);
3882 
3883 		if (grbm_soft_reset) {
3884 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3885 			tmp |= grbm_soft_reset;
3886 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3887 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3888 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3889 
3890 			udelay(50);
3891 
3892 			tmp &= ~grbm_soft_reset;
3893 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3894 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3895 		}
3896 
3897 		/* Wait a little for things to settle down */
3898 		udelay(50);
3899 	}
3900 	return 0;
3901 }
3902 
3903 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3904 {
3905 	signed long r, cnt = 0;
3906 	unsigned long flags;
3907 	uint32_t seq, reg_val_offs = 0;
3908 	uint64_t value = 0;
3909 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3910 	struct amdgpu_ring *ring = &kiq->ring;
3911 
3912 	BUG_ON(!ring->funcs->emit_rreg);
3913 
3914 	spin_lock_irqsave(&kiq->ring_lock, flags);
3915 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3916 		pr_err("critical bug! too many kiq readers\n");
3917 		goto failed_unlock;
3918 	}
3919 	amdgpu_ring_alloc(ring, 32);
3920 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3921 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3922 				(5 << 8) |	/* dst: memory */
3923 				(1 << 16) |	/* count sel */
3924 				(1 << 20));	/* write confirm */
3925 	amdgpu_ring_write(ring, 0);
3926 	amdgpu_ring_write(ring, 0);
3927 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3928 				reg_val_offs * 4));
3929 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3930 				reg_val_offs * 4));
3931 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3932 	if (r)
3933 		goto failed_undo;
3934 
3935 	amdgpu_ring_commit(ring);
3936 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3937 
3938 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3939 
3940 	/* don't wait anymore for gpu reset case because this way may
3941 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3942 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3943 	 * never return if we keep waiting in virt_kiq_rreg, which cause
3944 	 * gpu_recover() hang there.
3945 	 *
3946 	 * also don't wait anymore for IRQ context
3947 	 * */
3948 	if (r < 1 && (amdgpu_in_reset(adev)))
3949 		goto failed_kiq_read;
3950 
3951 	might_sleep();
3952 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3953 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3954 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3955 	}
3956 
3957 	if (cnt > MAX_KIQ_REG_TRY)
3958 		goto failed_kiq_read;
3959 
3960 	mb();
3961 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
3962 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3963 	amdgpu_device_wb_free(adev, reg_val_offs);
3964 	return value;
3965 
3966 failed_undo:
3967 	amdgpu_ring_undo(ring);
3968 failed_unlock:
3969 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3970 failed_kiq_read:
3971 	if (reg_val_offs)
3972 		amdgpu_device_wb_free(adev, reg_val_offs);
3973 	pr_err("failed to read gpu clock\n");
3974 	return ~0;
3975 }
3976 
3977 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3978 {
3979 	uint64_t clock, clock_lo, clock_hi, hi_check;
3980 
3981 	switch (adev->ip_versions[GC_HWIP][0]) {
3982 	case IP_VERSION(9, 3, 0):
3983 		preempt_disable();
3984 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
3985 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
3986 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
3987 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
3988 		 * roughly every 42 seconds.
3989 		 */
3990 		if (hi_check != clock_hi) {
3991 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
3992 			clock_hi = hi_check;
3993 		}
3994 		preempt_enable();
3995 		clock = clock_lo | (clock_hi << 32ULL);
3996 		break;
3997 	default:
3998 		amdgpu_gfx_off_ctrl(adev, false);
3999 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4000 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4001 			clock = gfx_v9_0_kiq_read_clock(adev);
4002 		} else {
4003 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4004 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4005 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4006 		}
4007 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4008 		amdgpu_gfx_off_ctrl(adev, true);
4009 		break;
4010 	}
4011 	return clock;
4012 }
4013 
4014 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4015 					  uint32_t vmid,
4016 					  uint32_t gds_base, uint32_t gds_size,
4017 					  uint32_t gws_base, uint32_t gws_size,
4018 					  uint32_t oa_base, uint32_t oa_size)
4019 {
4020 	struct amdgpu_device *adev = ring->adev;
4021 
4022 	/* GDS Base */
4023 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4024 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4025 				   gds_base);
4026 
4027 	/* GDS Size */
4028 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4029 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4030 				   gds_size);
4031 
4032 	/* GWS */
4033 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4034 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4035 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4036 
4037 	/* OA */
4038 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4039 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4040 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4041 }
4042 
4043 static const u32 vgpr_init_compute_shader[] =
4044 {
4045 	0xb07c0000, 0xbe8000ff,
4046 	0x000000f8, 0xbf110800,
4047 	0x7e000280, 0x7e020280,
4048 	0x7e040280, 0x7e060280,
4049 	0x7e080280, 0x7e0a0280,
4050 	0x7e0c0280, 0x7e0e0280,
4051 	0x80808800, 0xbe803200,
4052 	0xbf84fff5, 0xbf9c0000,
4053 	0xd28c0001, 0x0001007f,
4054 	0xd28d0001, 0x0002027e,
4055 	0x10020288, 0xb8810904,
4056 	0xb7814000, 0xd1196a01,
4057 	0x00000301, 0xbe800087,
4058 	0xbefc00c1, 0xd89c4000,
4059 	0x00020201, 0xd89cc080,
4060 	0x00040401, 0x320202ff,
4061 	0x00000800, 0x80808100,
4062 	0xbf84fff8, 0x7e020280,
4063 	0xbf810000, 0x00000000,
4064 };
4065 
4066 static const u32 sgpr_init_compute_shader[] =
4067 {
4068 	0xb07c0000, 0xbe8000ff,
4069 	0x0000005f, 0xbee50080,
4070 	0xbe812c65, 0xbe822c65,
4071 	0xbe832c65, 0xbe842c65,
4072 	0xbe852c65, 0xb77c0005,
4073 	0x80808500, 0xbf84fff8,
4074 	0xbe800080, 0xbf810000,
4075 };
4076 
4077 static const u32 vgpr_init_compute_shader_arcturus[] = {
4078 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4079 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4080 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4081 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4082 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4083 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4084 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4085 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4086 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4087 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4088 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4089 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4090 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4091 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4092 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4093 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4094 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4095 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4096 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4097 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4098 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4099 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4100 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4101 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4102 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4103 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4104 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4105 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4106 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4107 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4108 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4109 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4110 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4111 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4112 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4113 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4114 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4115 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4116 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4117 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4118 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4119 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4120 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4121 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4122 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4123 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4124 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4125 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4126 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4127 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4128 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4129 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4130 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4131 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4132 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4133 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4134 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4135 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4136 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4137 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4138 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4139 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4140 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4141 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4142 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4143 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4144 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4145 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4146 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4147 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4148 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4149 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4150 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4151 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4152 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4153 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4154 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4155 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4156 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4157 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4158 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4159 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4160 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4161 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4162 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4163 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4164 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4165 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4166 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4167 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4168 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4169 	0xbf84fff8, 0xbf810000,
4170 };
4171 
4172 /* When below register arrays changed, please update gpr_reg_size,
4173   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4174   to cover all gfx9 ASICs */
4175 static const struct soc15_reg_entry vgpr_init_regs[] = {
4176    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4177    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4178    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4179    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4180    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4181    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4182    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4183    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4184    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4185    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4186    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4187    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4188    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4189    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4190 };
4191 
4192 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4193    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4194    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4195    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4196    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4197    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4198    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4199    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4200    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4201    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4202    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4203    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4204    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4205    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4206    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4207 };
4208 
4209 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4210    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4211    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4212    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4213    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4214    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4215    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4216    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4217    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4218    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4219    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4220    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4221    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4222    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4223    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4224 };
4225 
4226 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4227    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4228    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4229    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4230    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4231    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4232    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4233    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4234    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4235    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4236    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4237    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4238    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4239    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4240    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4241 };
4242 
4243 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4244    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4245    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4246    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4247    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4248    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4249    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4250    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4251    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4252    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4253    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4254    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4255    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4256    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4257    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4258    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4259    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4260    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4261    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4262    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4263    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4264    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4265    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4266    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4267    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4268    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4269    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4270    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4271    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4272    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4273    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4274    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4275    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4276    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4277 };
4278 
4279 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4280 {
4281 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4282 	int i, r;
4283 
4284 	/* only support when RAS is enabled */
4285 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4286 		return 0;
4287 
4288 	r = amdgpu_ring_alloc(ring, 7);
4289 	if (r) {
4290 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4291 			ring->name, r);
4292 		return r;
4293 	}
4294 
4295 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4296 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4297 
4298 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4299 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4300 				PACKET3_DMA_DATA_DST_SEL(1) |
4301 				PACKET3_DMA_DATA_SRC_SEL(2) |
4302 				PACKET3_DMA_DATA_ENGINE(0)));
4303 	amdgpu_ring_write(ring, 0);
4304 	amdgpu_ring_write(ring, 0);
4305 	amdgpu_ring_write(ring, 0);
4306 	amdgpu_ring_write(ring, 0);
4307 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4308 				adev->gds.gds_size);
4309 
4310 	amdgpu_ring_commit(ring);
4311 
4312 	for (i = 0; i < adev->usec_timeout; i++) {
4313 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4314 			break;
4315 		udelay(1);
4316 	}
4317 
4318 	if (i >= adev->usec_timeout)
4319 		r = -ETIMEDOUT;
4320 
4321 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4322 
4323 	return r;
4324 }
4325 
4326 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4327 {
4328 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4329 	struct amdgpu_ib ib;
4330 	struct dma_fence *f = NULL;
4331 	int r, i;
4332 	unsigned total_size, vgpr_offset, sgpr_offset;
4333 	u64 gpu_addr;
4334 
4335 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4336 						adev->gfx.config.max_cu_per_sh *
4337 						adev->gfx.config.max_sh_per_se;
4338 	int sgpr_work_group_size = 5;
4339 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4340 	int vgpr_init_shader_size;
4341 	const u32 *vgpr_init_shader_ptr;
4342 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4343 
4344 	/* only support when RAS is enabled */
4345 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4346 		return 0;
4347 
4348 	/* bail if the compute ring is not ready */
4349 	if (!ring->sched.ready)
4350 		return 0;
4351 
4352 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4353 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4354 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4355 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4356 	} else {
4357 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4358 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4359 		vgpr_init_regs_ptr = vgpr_init_regs;
4360 	}
4361 
4362 	total_size =
4363 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4364 	total_size +=
4365 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4366 	total_size +=
4367 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4368 	total_size = ALIGN(total_size, 256);
4369 	vgpr_offset = total_size;
4370 	total_size += ALIGN(vgpr_init_shader_size, 256);
4371 	sgpr_offset = total_size;
4372 	total_size += sizeof(sgpr_init_compute_shader);
4373 
4374 	/* allocate an indirect buffer to put the commands in */
4375 	memset(&ib, 0, sizeof(ib));
4376 	r = amdgpu_ib_get(adev, NULL, total_size,
4377 					AMDGPU_IB_POOL_DIRECT, &ib);
4378 	if (r) {
4379 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4380 		return r;
4381 	}
4382 
4383 	/* load the compute shaders */
4384 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4385 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4386 
4387 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4388 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4389 
4390 	/* init the ib length to 0 */
4391 	ib.length_dw = 0;
4392 
4393 	/* VGPR */
4394 	/* write the register state for the compute dispatch */
4395 	for (i = 0; i < gpr_reg_size; i++) {
4396 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4397 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4398 								- PACKET3_SET_SH_REG_START;
4399 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4400 	}
4401 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4402 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4403 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4404 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4405 							- PACKET3_SET_SH_REG_START;
4406 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4407 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4408 
4409 	/* write dispatch packet */
4410 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4411 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4412 	ib.ptr[ib.length_dw++] = 1; /* y */
4413 	ib.ptr[ib.length_dw++] = 1; /* z */
4414 	ib.ptr[ib.length_dw++] =
4415 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4416 
4417 	/* write CS partial flush packet */
4418 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4419 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4420 
4421 	/* SGPR1 */
4422 	/* write the register state for the compute dispatch */
4423 	for (i = 0; i < gpr_reg_size; i++) {
4424 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4425 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4426 								- PACKET3_SET_SH_REG_START;
4427 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4428 	}
4429 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4430 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4431 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4432 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4433 							- PACKET3_SET_SH_REG_START;
4434 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4435 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4436 
4437 	/* write dispatch packet */
4438 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4439 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4440 	ib.ptr[ib.length_dw++] = 1; /* y */
4441 	ib.ptr[ib.length_dw++] = 1; /* z */
4442 	ib.ptr[ib.length_dw++] =
4443 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4444 
4445 	/* write CS partial flush packet */
4446 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4447 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4448 
4449 	/* SGPR2 */
4450 	/* write the register state for the compute dispatch */
4451 	for (i = 0; i < gpr_reg_size; i++) {
4452 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4453 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4454 								- PACKET3_SET_SH_REG_START;
4455 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4456 	}
4457 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4458 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4459 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4460 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4461 							- PACKET3_SET_SH_REG_START;
4462 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4463 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4464 
4465 	/* write dispatch packet */
4466 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4467 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4468 	ib.ptr[ib.length_dw++] = 1; /* y */
4469 	ib.ptr[ib.length_dw++] = 1; /* z */
4470 	ib.ptr[ib.length_dw++] =
4471 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4472 
4473 	/* write CS partial flush packet */
4474 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4475 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4476 
4477 	/* shedule the ib on the ring */
4478 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4479 	if (r) {
4480 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4481 		goto fail;
4482 	}
4483 
4484 	/* wait for the GPU to finish processing the IB */
4485 	r = dma_fence_wait(f, false);
4486 	if (r) {
4487 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4488 		goto fail;
4489 	}
4490 
4491 fail:
4492 	amdgpu_ib_free(adev, &ib, NULL);
4493 	dma_fence_put(f);
4494 
4495 	return r;
4496 }
4497 
4498 static int gfx_v9_0_early_init(void *handle)
4499 {
4500 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4501 
4502 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4503 
4504 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4505 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4506 		adev->gfx.num_gfx_rings = 0;
4507 	else
4508 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4509 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4510 					  AMDGPU_MAX_COMPUTE_RINGS);
4511 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4512 	gfx_v9_0_set_ring_funcs(adev);
4513 	gfx_v9_0_set_irq_funcs(adev);
4514 	gfx_v9_0_set_gds_init(adev);
4515 	gfx_v9_0_set_rlc_funcs(adev);
4516 
4517 	/* init rlcg reg access ctrl */
4518 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4519 
4520 	return gfx_v9_0_init_microcode(adev);
4521 }
4522 
4523 static int gfx_v9_0_ecc_late_init(void *handle)
4524 {
4525 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4526 	int r;
4527 
4528 	/*
4529 	 * Temp workaround to fix the issue that CP firmware fails to
4530 	 * update read pointer when CPDMA is writing clearing operation
4531 	 * to GDS in suspend/resume sequence on several cards. So just
4532 	 * limit this operation in cold boot sequence.
4533 	 */
4534 	if ((!adev->in_suspend) &&
4535 	    (adev->gds.gds_size)) {
4536 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4537 		if (r)
4538 			return r;
4539 	}
4540 
4541 	/* requires IBs so do in late init after IB pool is initialized */
4542 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4543 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4544 	else
4545 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4546 
4547 	if (r)
4548 		return r;
4549 
4550 	if (adev->gfx.ras &&
4551 	    adev->gfx.ras->enable_watchdog_timer)
4552 		adev->gfx.ras->enable_watchdog_timer(adev);
4553 
4554 	return 0;
4555 }
4556 
4557 static int gfx_v9_0_late_init(void *handle)
4558 {
4559 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4560 	int r;
4561 
4562 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4563 	if (r)
4564 		return r;
4565 
4566 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4567 	if (r)
4568 		return r;
4569 
4570 	r = gfx_v9_0_ecc_late_init(handle);
4571 	if (r)
4572 		return r;
4573 
4574 	return 0;
4575 }
4576 
4577 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4578 {
4579 	uint32_t rlc_setting;
4580 
4581 	/* if RLC is not enabled, do nothing */
4582 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4583 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4584 		return false;
4585 
4586 	return true;
4587 }
4588 
4589 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4590 {
4591 	uint32_t data;
4592 	unsigned i;
4593 
4594 	data = RLC_SAFE_MODE__CMD_MASK;
4595 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4596 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4597 
4598 	/* wait for RLC_SAFE_MODE */
4599 	for (i = 0; i < adev->usec_timeout; i++) {
4600 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4601 			break;
4602 		udelay(1);
4603 	}
4604 }
4605 
4606 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4607 {
4608 	uint32_t data;
4609 
4610 	data = RLC_SAFE_MODE__CMD_MASK;
4611 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4612 }
4613 
4614 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4615 						bool enable)
4616 {
4617 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4618 
4619 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4620 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4621 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4622 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4623 	} else {
4624 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4625 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4626 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4627 	}
4628 
4629 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4630 }
4631 
4632 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4633 						bool enable)
4634 {
4635 	/* TODO: double check if we need to perform under safe mode */
4636 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4637 
4638 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4639 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4640 	else
4641 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4642 
4643 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4644 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4645 	else
4646 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4647 
4648 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4649 }
4650 
4651 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4652 						      bool enable)
4653 {
4654 	uint32_t data, def;
4655 
4656 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4657 
4658 	/* It is disabled by HW by default */
4659 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4660 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4661 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4662 
4663 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4664 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4665 
4666 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4667 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4668 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4669 
4670 		/* only for Vega10 & Raven1 */
4671 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4672 
4673 		if (def != data)
4674 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4675 
4676 		/* MGLS is a global flag to control all MGLS in GFX */
4677 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4678 			/* 2 - RLC memory Light sleep */
4679 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4680 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4681 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4682 				if (def != data)
4683 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4684 			}
4685 			/* 3 - CP memory Light sleep */
4686 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4687 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4688 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4689 				if (def != data)
4690 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4691 			}
4692 		}
4693 	} else {
4694 		/* 1 - MGCG_OVERRIDE */
4695 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4696 
4697 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4698 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4699 
4700 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4701 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4702 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4703 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4704 
4705 		if (def != data)
4706 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4707 
4708 		/* 2 - disable MGLS in RLC */
4709 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4710 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4711 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4712 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4713 		}
4714 
4715 		/* 3 - disable MGLS in CP */
4716 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4717 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4718 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4719 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4720 		}
4721 	}
4722 
4723 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4724 }
4725 
4726 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4727 					   bool enable)
4728 {
4729 	uint32_t data, def;
4730 
4731 	if (!adev->gfx.num_gfx_rings)
4732 		return;
4733 
4734 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4735 
4736 	/* Enable 3D CGCG/CGLS */
4737 	if (enable) {
4738 		/* write cmd to clear cgcg/cgls ov */
4739 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4740 		/* unset CGCG override */
4741 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4742 		/* update CGCG and CGLS override bits */
4743 		if (def != data)
4744 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4745 
4746 		/* enable 3Dcgcg FSM(0x0000363f) */
4747 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4748 
4749 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4750 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4751 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4752 		else
4753 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4754 
4755 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4756 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4757 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4758 		if (def != data)
4759 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4760 
4761 		/* set IDLE_POLL_COUNT(0x00900100) */
4762 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4763 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4764 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4765 		if (def != data)
4766 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4767 	} else {
4768 		/* Disable CGCG/CGLS */
4769 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4770 		/* disable cgcg, cgls should be disabled */
4771 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4772 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4773 		/* disable cgcg and cgls in FSM */
4774 		if (def != data)
4775 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4776 	}
4777 
4778 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4779 }
4780 
4781 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4782 						      bool enable)
4783 {
4784 	uint32_t def, data;
4785 
4786 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4787 
4788 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4789 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4790 		/* unset CGCG override */
4791 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4792 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4793 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4794 		else
4795 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4796 		/* update CGCG and CGLS override bits */
4797 		if (def != data)
4798 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4799 
4800 		/* enable cgcg FSM(0x0000363F) */
4801 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4802 
4803 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4804 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4805 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4806 		else
4807 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4808 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4809 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4810 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4811 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4812 		if (def != data)
4813 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4814 
4815 		/* set IDLE_POLL_COUNT(0x00900100) */
4816 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4817 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4818 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4819 		if (def != data)
4820 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4821 	} else {
4822 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4823 		/* reset CGCG/CGLS bits */
4824 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4825 		/* disable cgcg and cgls in FSM */
4826 		if (def != data)
4827 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4828 	}
4829 
4830 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4831 }
4832 
4833 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4834 					    bool enable)
4835 {
4836 	if (enable) {
4837 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4838 		 * ===  MGCG + MGLS ===
4839 		 */
4840 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4841 		/* ===  CGCG /CGLS for GFX 3D Only === */
4842 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4843 		/* ===  CGCG + CGLS === */
4844 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4845 	} else {
4846 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4847 		 * ===  CGCG + CGLS ===
4848 		 */
4849 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4850 		/* ===  CGCG /CGLS for GFX 3D Only === */
4851 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4852 		/* ===  MGCG + MGLS === */
4853 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4854 	}
4855 	return 0;
4856 }
4857 
4858 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4859 {
4860 	u32 reg, data;
4861 
4862 	amdgpu_gfx_off_ctrl(adev, false);
4863 
4864 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4865 	if (amdgpu_sriov_is_pp_one_vf(adev))
4866 		data = RREG32_NO_KIQ(reg);
4867 	else
4868 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4869 
4870 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4871 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4872 
4873 	if (amdgpu_sriov_is_pp_one_vf(adev))
4874 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4875 	else
4876 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4877 
4878 	amdgpu_gfx_off_ctrl(adev, true);
4879 }
4880 
4881 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4882 					uint32_t offset,
4883 					struct soc15_reg_rlcg *entries, int arr_size)
4884 {
4885 	int i;
4886 	uint32_t reg;
4887 
4888 	if (!entries)
4889 		return false;
4890 
4891 	for (i = 0; i < arr_size; i++) {
4892 		const struct soc15_reg_rlcg *entry;
4893 
4894 		entry = &entries[i];
4895 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4896 		if (offset == reg)
4897 			return true;
4898 	}
4899 
4900 	return false;
4901 }
4902 
4903 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4904 {
4905 	return gfx_v9_0_check_rlcg_range(adev, offset,
4906 					(void *)rlcg_access_gc_9_0,
4907 					ARRAY_SIZE(rlcg_access_gc_9_0));
4908 }
4909 
4910 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4911 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4912 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4913 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4914 	.init = gfx_v9_0_rlc_init,
4915 	.get_csb_size = gfx_v9_0_get_csb_size,
4916 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4917 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4918 	.resume = gfx_v9_0_rlc_resume,
4919 	.stop = gfx_v9_0_rlc_stop,
4920 	.reset = gfx_v9_0_rlc_reset,
4921 	.start = gfx_v9_0_rlc_start,
4922 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4923 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4924 };
4925 
4926 static int gfx_v9_0_set_powergating_state(void *handle,
4927 					  enum amd_powergating_state state)
4928 {
4929 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4930 	bool enable = (state == AMD_PG_STATE_GATE);
4931 
4932 	switch (adev->ip_versions[GC_HWIP][0]) {
4933 	case IP_VERSION(9, 2, 2):
4934 	case IP_VERSION(9, 1, 0):
4935 	case IP_VERSION(9, 3, 0):
4936 		if (!enable)
4937 			amdgpu_gfx_off_ctrl(adev, false);
4938 
4939 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4940 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4941 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4942 		} else {
4943 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4944 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4945 		}
4946 
4947 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4948 			gfx_v9_0_enable_cp_power_gating(adev, true);
4949 		else
4950 			gfx_v9_0_enable_cp_power_gating(adev, false);
4951 
4952 		/* update gfx cgpg state */
4953 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4954 
4955 		/* update mgcg state */
4956 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4957 
4958 		if (enable)
4959 			amdgpu_gfx_off_ctrl(adev, true);
4960 		break;
4961 	case IP_VERSION(9, 2, 1):
4962 		amdgpu_gfx_off_ctrl(adev, enable);
4963 		break;
4964 	default:
4965 		break;
4966 	}
4967 
4968 	return 0;
4969 }
4970 
4971 static int gfx_v9_0_set_clockgating_state(void *handle,
4972 					  enum amd_clockgating_state state)
4973 {
4974 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4975 
4976 	if (amdgpu_sriov_vf(adev))
4977 		return 0;
4978 
4979 	switch (adev->ip_versions[GC_HWIP][0]) {
4980 	case IP_VERSION(9, 0, 1):
4981 	case IP_VERSION(9, 2, 1):
4982 	case IP_VERSION(9, 4, 0):
4983 	case IP_VERSION(9, 2, 2):
4984 	case IP_VERSION(9, 1, 0):
4985 	case IP_VERSION(9, 4, 1):
4986 	case IP_VERSION(9, 3, 0):
4987 	case IP_VERSION(9, 4, 2):
4988 		gfx_v9_0_update_gfx_clock_gating(adev,
4989 						 state == AMD_CG_STATE_GATE);
4990 		break;
4991 	default:
4992 		break;
4993 	}
4994 	return 0;
4995 }
4996 
4997 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
4998 {
4999 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000 	int data;
5001 
5002 	if (amdgpu_sriov_vf(adev))
5003 		*flags = 0;
5004 
5005 	/* AMD_CG_SUPPORT_GFX_MGCG */
5006 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5007 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5008 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5009 
5010 	/* AMD_CG_SUPPORT_GFX_CGCG */
5011 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5012 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5013 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5014 
5015 	/* AMD_CG_SUPPORT_GFX_CGLS */
5016 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5017 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5018 
5019 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5020 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5021 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5022 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5023 
5024 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5025 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5026 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5027 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5028 
5029 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5030 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5031 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5032 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5033 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5034 
5035 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5036 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5037 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5038 	}
5039 }
5040 
5041 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5042 {
5043 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5044 }
5045 
5046 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5047 {
5048 	struct amdgpu_device *adev = ring->adev;
5049 	u64 wptr;
5050 
5051 	/* XXX check if swapping is necessary on BE */
5052 	if (ring->use_doorbell) {
5053 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5054 	} else {
5055 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5056 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5057 	}
5058 
5059 	return wptr;
5060 }
5061 
5062 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5063 {
5064 	struct amdgpu_device *adev = ring->adev;
5065 
5066 	if (ring->use_doorbell) {
5067 		/* XXX check if swapping is necessary on BE */
5068 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5069 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5070 	} else {
5071 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5072 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5073 	}
5074 }
5075 
5076 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5077 {
5078 	struct amdgpu_device *adev = ring->adev;
5079 	u32 ref_and_mask, reg_mem_engine;
5080 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5081 
5082 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5083 		switch (ring->me) {
5084 		case 1:
5085 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5086 			break;
5087 		case 2:
5088 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5089 			break;
5090 		default:
5091 			return;
5092 		}
5093 		reg_mem_engine = 0;
5094 	} else {
5095 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5096 		reg_mem_engine = 1; /* pfp */
5097 	}
5098 
5099 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5100 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5101 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5102 			      ref_and_mask, ref_and_mask, 0x20);
5103 }
5104 
5105 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5106 					struct amdgpu_job *job,
5107 					struct amdgpu_ib *ib,
5108 					uint32_t flags)
5109 {
5110 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5111 	u32 header, control = 0;
5112 
5113 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5114 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5115 	else
5116 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5117 
5118 	control |= ib->length_dw | (vmid << 24);
5119 
5120 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5121 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5122 
5123 		if (flags & AMDGPU_IB_PREEMPTED)
5124 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5125 
5126 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5127 			gfx_v9_0_ring_emit_de_meta(ring,
5128 						   (!amdgpu_sriov_vf(ring->adev) &&
5129 						   flags & AMDGPU_IB_PREEMPTED) ?
5130 						   true : false,
5131 						   job->gds_size > 0 && job->gds_base != 0);
5132 	}
5133 
5134 	amdgpu_ring_write(ring, header);
5135 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5136 	amdgpu_ring_write(ring,
5137 #ifdef __BIG_ENDIAN
5138 		(2 << 0) |
5139 #endif
5140 		lower_32_bits(ib->gpu_addr));
5141 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5142 	amdgpu_ring_ib_on_emit_cntl(ring);
5143 	amdgpu_ring_write(ring, control);
5144 }
5145 
5146 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5147 				     unsigned offset)
5148 {
5149 	u32 control = ring->ring[offset];
5150 
5151 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5152 	ring->ring[offset] = control;
5153 }
5154 
5155 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5156 					unsigned offset)
5157 {
5158 	struct amdgpu_device *adev = ring->adev;
5159 	void *ce_payload_cpu_addr;
5160 	uint64_t payload_offset, payload_size;
5161 
5162 	payload_size = sizeof(struct v9_ce_ib_state);
5163 
5164 	if (ring->is_mes_queue) {
5165 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5166 					  gfx[0].gfx_meta_data) +
5167 			offsetof(struct v9_gfx_meta_data, ce_payload);
5168 		ce_payload_cpu_addr =
5169 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5170 	} else {
5171 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5172 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5173 	}
5174 
5175 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5176 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5177 	} else {
5178 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5179 		       (ring->buf_mask + 1 - offset) << 2);
5180 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5181 		memcpy((void *)&ring->ring[0],
5182 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5183 		       payload_size);
5184 	}
5185 }
5186 
5187 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5188 					unsigned offset)
5189 {
5190 	struct amdgpu_device *adev = ring->adev;
5191 	void *de_payload_cpu_addr;
5192 	uint64_t payload_offset, payload_size;
5193 
5194 	payload_size = sizeof(struct v9_de_ib_state);
5195 
5196 	if (ring->is_mes_queue) {
5197 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5198 					  gfx[0].gfx_meta_data) +
5199 			offsetof(struct v9_gfx_meta_data, de_payload);
5200 		de_payload_cpu_addr =
5201 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5202 	} else {
5203 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5204 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5205 	}
5206 
5207 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5208 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5209 	} else {
5210 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5211 		       (ring->buf_mask + 1 - offset) << 2);
5212 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5213 		memcpy((void *)&ring->ring[0],
5214 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5215 		       payload_size);
5216 	}
5217 }
5218 
5219 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5220 					  struct amdgpu_job *job,
5221 					  struct amdgpu_ib *ib,
5222 					  uint32_t flags)
5223 {
5224 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5225 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5226 
5227 	/* Currently, there is a high possibility to get wave ID mismatch
5228 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5229 	 * different wave IDs than the GDS expects. This situation happens
5230 	 * randomly when at least 5 compute pipes use GDS ordered append.
5231 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5232 	 * Those are probably bugs somewhere else in the kernel driver.
5233 	 *
5234 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5235 	 * GDS to 0 for this ring (me/pipe).
5236 	 */
5237 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5238 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5239 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5240 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5241 	}
5242 
5243 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5244 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5245 	amdgpu_ring_write(ring,
5246 #ifdef __BIG_ENDIAN
5247 				(2 << 0) |
5248 #endif
5249 				lower_32_bits(ib->gpu_addr));
5250 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5251 	amdgpu_ring_write(ring, control);
5252 }
5253 
5254 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5255 				     u64 seq, unsigned flags)
5256 {
5257 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5258 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5259 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5260 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5261 	uint32_t dw2 = 0;
5262 
5263 	/* RELEASE_MEM - flush caches, send int */
5264 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5265 
5266 	if (writeback) {
5267 		dw2 = EOP_TC_NC_ACTION_EN;
5268 	} else {
5269 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5270 				EOP_TC_MD_ACTION_EN;
5271 	}
5272 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5273 				EVENT_INDEX(5);
5274 	if (exec)
5275 		dw2 |= EOP_EXEC;
5276 
5277 	amdgpu_ring_write(ring, dw2);
5278 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5279 
5280 	/*
5281 	 * the address should be Qword aligned if 64bit write, Dword
5282 	 * aligned if only send 32bit data low (discard data high)
5283 	 */
5284 	if (write64bit)
5285 		BUG_ON(addr & 0x7);
5286 	else
5287 		BUG_ON(addr & 0x3);
5288 	amdgpu_ring_write(ring, lower_32_bits(addr));
5289 	amdgpu_ring_write(ring, upper_32_bits(addr));
5290 	amdgpu_ring_write(ring, lower_32_bits(seq));
5291 	amdgpu_ring_write(ring, upper_32_bits(seq));
5292 	amdgpu_ring_write(ring, 0);
5293 }
5294 
5295 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5296 {
5297 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5298 	uint32_t seq = ring->fence_drv.sync_seq;
5299 	uint64_t addr = ring->fence_drv.gpu_addr;
5300 
5301 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5302 			      lower_32_bits(addr), upper_32_bits(addr),
5303 			      seq, 0xffffffff, 4);
5304 }
5305 
5306 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5307 					unsigned vmid, uint64_t pd_addr)
5308 {
5309 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5310 
5311 	/* compute doesn't have PFP */
5312 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5313 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5314 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5315 		amdgpu_ring_write(ring, 0x0);
5316 	}
5317 }
5318 
5319 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5320 {
5321 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5322 }
5323 
5324 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5325 {
5326 	u64 wptr;
5327 
5328 	/* XXX check if swapping is necessary on BE */
5329 	if (ring->use_doorbell)
5330 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5331 	else
5332 		BUG();
5333 	return wptr;
5334 }
5335 
5336 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5337 {
5338 	struct amdgpu_device *adev = ring->adev;
5339 
5340 	/* XXX check if swapping is necessary on BE */
5341 	if (ring->use_doorbell) {
5342 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5343 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5344 	} else{
5345 		BUG(); /* only DOORBELL method supported on gfx9 now */
5346 	}
5347 }
5348 
5349 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5350 					 u64 seq, unsigned int flags)
5351 {
5352 	struct amdgpu_device *adev = ring->adev;
5353 
5354 	/* we only allocate 32bit for each seq wb address */
5355 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5356 
5357 	/* write fence seq to the "addr" */
5358 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5359 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5360 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5361 	amdgpu_ring_write(ring, lower_32_bits(addr));
5362 	amdgpu_ring_write(ring, upper_32_bits(addr));
5363 	amdgpu_ring_write(ring, lower_32_bits(seq));
5364 
5365 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5366 		/* set register to trigger INT */
5367 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5368 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5369 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5370 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5371 		amdgpu_ring_write(ring, 0);
5372 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5373 	}
5374 }
5375 
5376 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5377 {
5378 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5379 	amdgpu_ring_write(ring, 0);
5380 }
5381 
5382 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5383 {
5384 	struct amdgpu_device *adev = ring->adev;
5385 	struct v9_ce_ib_state ce_payload = {0};
5386 	uint64_t offset, ce_payload_gpu_addr;
5387 	void *ce_payload_cpu_addr;
5388 	int cnt;
5389 
5390 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5391 
5392 	if (ring->is_mes_queue) {
5393 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5394 				  gfx[0].gfx_meta_data) +
5395 			offsetof(struct v9_gfx_meta_data, ce_payload);
5396 		ce_payload_gpu_addr =
5397 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5398 		ce_payload_cpu_addr =
5399 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5400 	} else {
5401 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5402 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5403 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5404 	}
5405 
5406 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5407 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5408 				 WRITE_DATA_DST_SEL(8) |
5409 				 WR_CONFIRM) |
5410 				 WRITE_DATA_CACHE_POLICY(0));
5411 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5412 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5413 
5414 	amdgpu_ring_ib_on_emit_ce(ring);
5415 
5416 	if (resume)
5417 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5418 					   sizeof(ce_payload) >> 2);
5419 	else
5420 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5421 					   sizeof(ce_payload) >> 2);
5422 }
5423 
5424 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5425 {
5426 	int i, r = 0;
5427 	struct amdgpu_device *adev = ring->adev;
5428 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5429 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5430 	unsigned long flags;
5431 
5432 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5433 		return -EINVAL;
5434 
5435 	spin_lock_irqsave(&kiq->ring_lock, flags);
5436 
5437 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5438 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5439 		return -ENOMEM;
5440 	}
5441 
5442 	/* assert preemption condition */
5443 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5444 
5445 	ring->trail_seq += 1;
5446 	amdgpu_ring_alloc(ring, 13);
5447 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5448 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5449 
5450 	/* assert IB preemption, emit the trailing fence */
5451 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5452 				   ring->trail_fence_gpu_addr,
5453 				   ring->trail_seq);
5454 
5455 	amdgpu_ring_commit(kiq_ring);
5456 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5457 
5458 	/* poll the trailing fence */
5459 	for (i = 0; i < adev->usec_timeout; i++) {
5460 		if (ring->trail_seq ==
5461 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5462 			break;
5463 		udelay(1);
5464 	}
5465 
5466 	if (i >= adev->usec_timeout) {
5467 		r = -EINVAL;
5468 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5469 	}
5470 
5471 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5472 	amdgpu_ring_emit_wreg(ring,
5473 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5474 			      0x0);
5475 	amdgpu_ring_commit(ring);
5476 
5477 	/* deassert preemption condition */
5478 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5479 	return r;
5480 }
5481 
5482 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5483 {
5484 	struct amdgpu_device *adev = ring->adev;
5485 	struct v9_de_ib_state de_payload = {0};
5486 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5487 	void *de_payload_cpu_addr;
5488 	int cnt;
5489 
5490 	if (ring->is_mes_queue) {
5491 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5492 				  gfx[0].gfx_meta_data) +
5493 			offsetof(struct v9_gfx_meta_data, de_payload);
5494 		de_payload_gpu_addr =
5495 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5496 		de_payload_cpu_addr =
5497 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5498 
5499 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5500 				  gfx[0].gds_backup) +
5501 			offsetof(struct v9_gfx_meta_data, de_payload);
5502 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5503 	} else {
5504 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5505 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5506 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5507 
5508 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5509 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5510 				 PAGE_SIZE);
5511 	}
5512 
5513 	if (usegds) {
5514 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5515 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5516 	}
5517 
5518 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5519 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5520 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5521 				 WRITE_DATA_DST_SEL(8) |
5522 				 WR_CONFIRM) |
5523 				 WRITE_DATA_CACHE_POLICY(0));
5524 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5525 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5526 
5527 	amdgpu_ring_ib_on_emit_de(ring);
5528 	if (resume)
5529 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5530 					   sizeof(de_payload) >> 2);
5531 	else
5532 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5533 					   sizeof(de_payload) >> 2);
5534 }
5535 
5536 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5537 				   bool secure)
5538 {
5539 	uint32_t v = secure ? FRAME_TMZ : 0;
5540 
5541 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5542 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5543 }
5544 
5545 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5546 {
5547 	uint32_t dw2 = 0;
5548 
5549 	gfx_v9_0_ring_emit_ce_meta(ring,
5550 				   (!amdgpu_sriov_vf(ring->adev) &&
5551 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5552 
5553 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5554 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5555 		/* set load_global_config & load_global_uconfig */
5556 		dw2 |= 0x8001;
5557 		/* set load_cs_sh_regs */
5558 		dw2 |= 0x01000000;
5559 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5560 		dw2 |= 0x10002;
5561 
5562 		/* set load_ce_ram if preamble presented */
5563 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5564 			dw2 |= 0x10000000;
5565 	} else {
5566 		/* still load_ce_ram if this is the first time preamble presented
5567 		 * although there is no context switch happens.
5568 		 */
5569 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5570 			dw2 |= 0x10000000;
5571 	}
5572 
5573 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5574 	amdgpu_ring_write(ring, dw2);
5575 	amdgpu_ring_write(ring, 0);
5576 }
5577 
5578 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5579 {
5580 	unsigned ret;
5581 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5582 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5583 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5584 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5585 	ret = ring->wptr & ring->buf_mask;
5586 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5587 	return ret;
5588 }
5589 
5590 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5591 {
5592 	unsigned cur;
5593 	BUG_ON(offset > ring->buf_mask);
5594 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5595 
5596 	cur = (ring->wptr - 1) & ring->buf_mask;
5597 	if (likely(cur > offset))
5598 		ring->ring[offset] = cur - offset;
5599 	else
5600 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5601 }
5602 
5603 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5604 				    uint32_t reg_val_offs)
5605 {
5606 	struct amdgpu_device *adev = ring->adev;
5607 
5608 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5609 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5610 				(5 << 8) |	/* dst: memory */
5611 				(1 << 20));	/* write confirm */
5612 	amdgpu_ring_write(ring, reg);
5613 	amdgpu_ring_write(ring, 0);
5614 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5615 				reg_val_offs * 4));
5616 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5617 				reg_val_offs * 4));
5618 }
5619 
5620 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5621 				    uint32_t val)
5622 {
5623 	uint32_t cmd = 0;
5624 
5625 	switch (ring->funcs->type) {
5626 	case AMDGPU_RING_TYPE_GFX:
5627 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5628 		break;
5629 	case AMDGPU_RING_TYPE_KIQ:
5630 		cmd = (1 << 16); /* no inc addr */
5631 		break;
5632 	default:
5633 		cmd = WR_CONFIRM;
5634 		break;
5635 	}
5636 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5637 	amdgpu_ring_write(ring, cmd);
5638 	amdgpu_ring_write(ring, reg);
5639 	amdgpu_ring_write(ring, 0);
5640 	amdgpu_ring_write(ring, val);
5641 }
5642 
5643 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5644 					uint32_t val, uint32_t mask)
5645 {
5646 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5647 }
5648 
5649 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5650 						  uint32_t reg0, uint32_t reg1,
5651 						  uint32_t ref, uint32_t mask)
5652 {
5653 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5654 	struct amdgpu_device *adev = ring->adev;
5655 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5656 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5657 
5658 	if (fw_version_ok)
5659 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5660 				      ref, mask, 0x20);
5661 	else
5662 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5663 							   ref, mask);
5664 }
5665 
5666 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5667 {
5668 	struct amdgpu_device *adev = ring->adev;
5669 	uint32_t value = 0;
5670 
5671 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5672 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5673 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5674 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5675 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5676 }
5677 
5678 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5679 						 enum amdgpu_interrupt_state state)
5680 {
5681 	switch (state) {
5682 	case AMDGPU_IRQ_STATE_DISABLE:
5683 	case AMDGPU_IRQ_STATE_ENABLE:
5684 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5685 			       TIME_STAMP_INT_ENABLE,
5686 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5687 		break;
5688 	default:
5689 		break;
5690 	}
5691 }
5692 
5693 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5694 						     int me, int pipe,
5695 						     enum amdgpu_interrupt_state state)
5696 {
5697 	u32 mec_int_cntl, mec_int_cntl_reg;
5698 
5699 	/*
5700 	 * amdgpu controls only the first MEC. That's why this function only
5701 	 * handles the setting of interrupts for this specific MEC. All other
5702 	 * pipes' interrupts are set by amdkfd.
5703 	 */
5704 
5705 	if (me == 1) {
5706 		switch (pipe) {
5707 		case 0:
5708 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5709 			break;
5710 		case 1:
5711 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5712 			break;
5713 		case 2:
5714 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5715 			break;
5716 		case 3:
5717 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5718 			break;
5719 		default:
5720 			DRM_DEBUG("invalid pipe %d\n", pipe);
5721 			return;
5722 		}
5723 	} else {
5724 		DRM_DEBUG("invalid me %d\n", me);
5725 		return;
5726 	}
5727 
5728 	switch (state) {
5729 	case AMDGPU_IRQ_STATE_DISABLE:
5730 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5731 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5732 					     TIME_STAMP_INT_ENABLE, 0);
5733 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5734 		break;
5735 	case AMDGPU_IRQ_STATE_ENABLE:
5736 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5737 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5738 					     TIME_STAMP_INT_ENABLE, 1);
5739 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5740 		break;
5741 	default:
5742 		break;
5743 	}
5744 }
5745 
5746 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5747 					     struct amdgpu_irq_src *source,
5748 					     unsigned type,
5749 					     enum amdgpu_interrupt_state state)
5750 {
5751 	switch (state) {
5752 	case AMDGPU_IRQ_STATE_DISABLE:
5753 	case AMDGPU_IRQ_STATE_ENABLE:
5754 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5755 			       PRIV_REG_INT_ENABLE,
5756 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5757 		break;
5758 	default:
5759 		break;
5760 	}
5761 
5762 	return 0;
5763 }
5764 
5765 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5766 					      struct amdgpu_irq_src *source,
5767 					      unsigned type,
5768 					      enum amdgpu_interrupt_state state)
5769 {
5770 	switch (state) {
5771 	case AMDGPU_IRQ_STATE_DISABLE:
5772 	case AMDGPU_IRQ_STATE_ENABLE:
5773 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5774 			       PRIV_INSTR_INT_ENABLE,
5775 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5776 		break;
5777 	default:
5778 		break;
5779 	}
5780 
5781 	return 0;
5782 }
5783 
5784 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5785 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5786 			CP_ECC_ERROR_INT_ENABLE, 1)
5787 
5788 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5789 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5790 			CP_ECC_ERROR_INT_ENABLE, 0)
5791 
5792 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5793 					      struct amdgpu_irq_src *source,
5794 					      unsigned type,
5795 					      enum amdgpu_interrupt_state state)
5796 {
5797 	switch (state) {
5798 	case AMDGPU_IRQ_STATE_DISABLE:
5799 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5800 				CP_ECC_ERROR_INT_ENABLE, 0);
5801 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5802 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5803 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5804 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5805 		break;
5806 
5807 	case AMDGPU_IRQ_STATE_ENABLE:
5808 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5809 				CP_ECC_ERROR_INT_ENABLE, 1);
5810 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5811 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5812 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5813 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5814 		break;
5815 	default:
5816 		break;
5817 	}
5818 
5819 	return 0;
5820 }
5821 
5822 
5823 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5824 					    struct amdgpu_irq_src *src,
5825 					    unsigned type,
5826 					    enum amdgpu_interrupt_state state)
5827 {
5828 	switch (type) {
5829 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5830 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5831 		break;
5832 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5833 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5834 		break;
5835 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5836 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5837 		break;
5838 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5839 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5840 		break;
5841 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5842 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5843 		break;
5844 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5845 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5846 		break;
5847 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5848 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5849 		break;
5850 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5851 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5852 		break;
5853 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5854 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5855 		break;
5856 	default:
5857 		break;
5858 	}
5859 	return 0;
5860 }
5861 
5862 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5863 			    struct amdgpu_irq_src *source,
5864 			    struct amdgpu_iv_entry *entry)
5865 {
5866 	int i;
5867 	u8 me_id, pipe_id, queue_id;
5868 	struct amdgpu_ring *ring;
5869 
5870 	DRM_DEBUG("IH: CP EOP\n");
5871 	me_id = (entry->ring_id & 0x0c) >> 2;
5872 	pipe_id = (entry->ring_id & 0x03) >> 0;
5873 	queue_id = (entry->ring_id & 0x70) >> 4;
5874 
5875 	switch (me_id) {
5876 	case 0:
5877 		if (adev->gfx.num_gfx_rings &&
5878 		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5879 			/* Fence signals are handled on the software rings*/
5880 			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5881 				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5882 		}
5883 		break;
5884 	case 1:
5885 	case 2:
5886 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5887 			ring = &adev->gfx.compute_ring[i];
5888 			/* Per-queue interrupt is supported for MEC starting from VI.
5889 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5890 			  */
5891 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5892 				amdgpu_fence_process(ring);
5893 		}
5894 		break;
5895 	}
5896 	return 0;
5897 }
5898 
5899 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5900 			   struct amdgpu_iv_entry *entry)
5901 {
5902 	u8 me_id, pipe_id, queue_id;
5903 	struct amdgpu_ring *ring;
5904 	int i;
5905 
5906 	me_id = (entry->ring_id & 0x0c) >> 2;
5907 	pipe_id = (entry->ring_id & 0x03) >> 0;
5908 	queue_id = (entry->ring_id & 0x70) >> 4;
5909 
5910 	switch (me_id) {
5911 	case 0:
5912 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5913 		break;
5914 	case 1:
5915 	case 2:
5916 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5917 			ring = &adev->gfx.compute_ring[i];
5918 			if (ring->me == me_id && ring->pipe == pipe_id &&
5919 			    ring->queue == queue_id)
5920 				drm_sched_fault(&ring->sched);
5921 		}
5922 		break;
5923 	}
5924 }
5925 
5926 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5927 				 struct amdgpu_irq_src *source,
5928 				 struct amdgpu_iv_entry *entry)
5929 {
5930 	DRM_ERROR("Illegal register access in command stream\n");
5931 	gfx_v9_0_fault(adev, entry);
5932 	return 0;
5933 }
5934 
5935 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5936 				  struct amdgpu_irq_src *source,
5937 				  struct amdgpu_iv_entry *entry)
5938 {
5939 	DRM_ERROR("Illegal instruction in command stream\n");
5940 	gfx_v9_0_fault(adev, entry);
5941 	return 0;
5942 }
5943 
5944 
5945 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5946 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5947 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5948 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5949 	},
5950 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5951 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5952 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5953 	},
5954 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5955 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5956 	  0, 0
5957 	},
5958 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5959 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5960 	  0, 0
5961 	},
5962 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5963 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5964 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5965 	},
5966 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5967 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5968 	  0, 0
5969 	},
5970 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5971 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5972 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5973 	},
5974 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5975 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5976 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5977 	},
5978 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5979 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5980 	  0, 0
5981 	},
5982 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5983 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5984 	  0, 0
5985 	},
5986 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5987 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5988 	  0, 0
5989 	},
5990 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5991 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5992 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5993 	},
5994 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5995 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5996 	  0, 0
5997 	},
5998 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5999 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6000 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6001 	},
6002 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6003 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6004 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6005 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6006 	},
6007 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6008 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6009 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6010 	  0, 0
6011 	},
6012 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6013 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6014 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6015 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6016 	},
6017 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6018 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6019 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6020 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6021 	},
6022 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6023 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6024 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6025 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6026 	},
6027 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6028 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6029 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6030 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6031 	},
6032 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6033 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6034 	  0, 0
6035 	},
6036 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6037 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6038 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6039 	},
6040 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6041 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6042 	  0, 0
6043 	},
6044 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6045 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6046 	  0, 0
6047 	},
6048 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6049 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6050 	  0, 0
6051 	},
6052 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6053 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6054 	  0, 0
6055 	},
6056 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6057 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6058 	  0, 0
6059 	},
6060 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6061 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6062 	  0, 0
6063 	},
6064 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6065 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6066 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6067 	},
6068 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6069 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6070 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6071 	},
6072 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6073 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6074 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6075 	},
6076 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6077 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6078 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6079 	},
6080 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6081 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6082 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6083 	},
6084 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6085 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6086 	  0, 0
6087 	},
6088 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6089 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6090 	  0, 0
6091 	},
6092 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6093 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6094 	  0, 0
6095 	},
6096 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6097 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6098 	  0, 0
6099 	},
6100 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6101 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6102 	  0, 0
6103 	},
6104 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6105 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6106 	  0, 0
6107 	},
6108 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6109 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6110 	  0, 0
6111 	},
6112 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6113 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6114 	  0, 0
6115 	},
6116 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6117 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6118 	  0, 0
6119 	},
6120 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6121 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6122 	  0, 0
6123 	},
6124 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6125 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6126 	  0, 0
6127 	},
6128 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6129 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6130 	  0, 0
6131 	},
6132 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6133 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6134 	  0, 0
6135 	},
6136 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6137 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6138 	  0, 0
6139 	},
6140 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6141 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6142 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6143 	},
6144 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6145 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6146 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6147 	},
6148 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6149 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6150 	  0, 0
6151 	},
6152 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6153 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6154 	  0, 0
6155 	},
6156 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6157 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6158 	  0, 0
6159 	},
6160 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6161 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6162 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6163 	},
6164 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6165 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6166 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6167 	},
6168 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6169 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6170 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6171 	},
6172 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6173 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6174 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6175 	},
6176 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6177 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6178 	  0, 0
6179 	},
6180 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6181 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6182 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6183 	},
6184 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6185 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6186 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6187 	},
6188 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6189 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6190 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6191 	},
6192 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6193 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6194 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6195 	},
6196 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6197 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6198 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6199 	},
6200 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6201 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6202 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6203 	},
6204 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6205 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6206 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6207 	},
6208 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6209 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6210 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6211 	},
6212 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6213 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6214 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6215 	},
6216 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6217 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6218 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6219 	},
6220 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6221 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6222 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6223 	},
6224 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6225 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6226 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6227 	},
6228 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6229 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6230 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6231 	},
6232 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6233 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6234 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6235 	},
6236 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6237 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6238 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6239 	},
6240 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6241 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6242 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6243 	},
6244 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6245 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6246 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6247 	},
6248 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6249 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6250 	  0, 0
6251 	},
6252 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6253 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6254 	  0, 0
6255 	},
6256 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6257 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6258 	  0, 0
6259 	},
6260 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6261 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6262 	  0, 0
6263 	},
6264 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6265 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6266 	  0, 0
6267 	},
6268 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6270 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6271 	},
6272 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6274 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6275 	},
6276 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6278 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6279 	},
6280 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6282 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6283 	},
6284 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6286 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6287 	},
6288 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6289 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6290 	  0, 0
6291 	},
6292 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6293 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6294 	  0, 0
6295 	},
6296 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6297 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6298 	  0, 0
6299 	},
6300 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6301 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6302 	  0, 0
6303 	},
6304 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6305 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6306 	  0, 0
6307 	},
6308 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6309 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6310 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6311 	},
6312 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6313 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6314 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6315 	},
6316 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6317 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6318 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6319 	},
6320 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6321 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6322 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6323 	},
6324 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6325 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6326 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6327 	},
6328 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6329 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6330 	  0, 0
6331 	},
6332 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6333 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6334 	  0, 0
6335 	},
6336 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6337 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6338 	  0, 0
6339 	},
6340 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6341 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6342 	  0, 0
6343 	},
6344 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6345 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6346 	  0, 0
6347 	},
6348 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6350 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6351 	},
6352 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6354 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6355 	},
6356 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6358 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6359 	},
6360 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6361 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6362 	  0, 0
6363 	},
6364 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6365 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6366 	  0, 0
6367 	},
6368 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6369 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6370 	  0, 0
6371 	},
6372 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6373 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6374 	  0, 0
6375 	},
6376 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6377 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6378 	  0, 0
6379 	},
6380 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6381 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6382 	  0, 0
6383 	}
6384 };
6385 
6386 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6387 				     void *inject_if)
6388 {
6389 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6390 	int ret;
6391 	struct ta_ras_trigger_error_input block_info = { 0 };
6392 
6393 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6394 		return -EINVAL;
6395 
6396 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6397 		return -EINVAL;
6398 
6399 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6400 		return -EPERM;
6401 
6402 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6403 	      info->head.type)) {
6404 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6405 			ras_gfx_subblocks[info->head.sub_block_index].name,
6406 			info->head.type);
6407 		return -EPERM;
6408 	}
6409 
6410 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6411 	      info->head.type)) {
6412 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6413 			ras_gfx_subblocks[info->head.sub_block_index].name,
6414 			info->head.type);
6415 		return -EPERM;
6416 	}
6417 
6418 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6419 	block_info.sub_block_index =
6420 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6421 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6422 	block_info.address = info->address;
6423 	block_info.value = info->value;
6424 
6425 	mutex_lock(&adev->grbm_idx_mutex);
6426 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6427 	mutex_unlock(&adev->grbm_idx_mutex);
6428 
6429 	return ret;
6430 }
6431 
6432 static const char *vml2_mems[] = {
6433 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6434 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6435 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6436 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6437 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6438 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6439 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6440 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6441 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6442 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6443 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6444 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6445 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6446 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6447 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6448 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6449 };
6450 
6451 static const char *vml2_walker_mems[] = {
6452 	"UTC_VML2_CACHE_PDE0_MEM0",
6453 	"UTC_VML2_CACHE_PDE0_MEM1",
6454 	"UTC_VML2_CACHE_PDE1_MEM0",
6455 	"UTC_VML2_CACHE_PDE1_MEM1",
6456 	"UTC_VML2_CACHE_PDE2_MEM0",
6457 	"UTC_VML2_CACHE_PDE2_MEM1",
6458 	"UTC_VML2_RDIF_LOG_FIFO",
6459 };
6460 
6461 static const char *atc_l2_cache_2m_mems[] = {
6462 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6463 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6464 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6465 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6466 };
6467 
6468 static const char *atc_l2_cache_4k_mems[] = {
6469 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6470 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6471 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6472 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6473 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6474 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6475 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6476 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6477 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6478 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6479 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6480 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6481 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6482 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6483 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6484 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6485 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6486 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6487 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6488 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6489 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6490 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6491 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6492 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6493 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6494 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6495 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6496 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6497 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6498 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6499 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6500 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6501 };
6502 
6503 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6504 					 struct ras_err_data *err_data)
6505 {
6506 	uint32_t i, data;
6507 	uint32_t sec_count, ded_count;
6508 
6509 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6510 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6511 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6512 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6513 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6514 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6515 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6516 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6517 
6518 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6519 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6520 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6521 
6522 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6523 		if (sec_count) {
6524 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6525 				"SEC %d\n", i, vml2_mems[i], sec_count);
6526 			err_data->ce_count += sec_count;
6527 		}
6528 
6529 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6530 		if (ded_count) {
6531 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6532 				"DED %d\n", i, vml2_mems[i], ded_count);
6533 			err_data->ue_count += ded_count;
6534 		}
6535 	}
6536 
6537 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6538 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6539 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6540 
6541 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6542 						SEC_COUNT);
6543 		if (sec_count) {
6544 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6545 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6546 			err_data->ce_count += sec_count;
6547 		}
6548 
6549 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6550 						DED_COUNT);
6551 		if (ded_count) {
6552 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6553 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6554 			err_data->ue_count += ded_count;
6555 		}
6556 	}
6557 
6558 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6559 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6560 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6561 
6562 		sec_count = (data & 0x00006000L) >> 0xd;
6563 		if (sec_count) {
6564 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6565 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6566 				sec_count);
6567 			err_data->ce_count += sec_count;
6568 		}
6569 	}
6570 
6571 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6572 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6573 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6574 
6575 		sec_count = (data & 0x00006000L) >> 0xd;
6576 		if (sec_count) {
6577 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6578 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6579 				sec_count);
6580 			err_data->ce_count += sec_count;
6581 		}
6582 
6583 		ded_count = (data & 0x00018000L) >> 0xf;
6584 		if (ded_count) {
6585 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6586 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6587 				ded_count);
6588 			err_data->ue_count += ded_count;
6589 		}
6590 	}
6591 
6592 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6593 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6594 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6595 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6596 
6597 	return 0;
6598 }
6599 
6600 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6601 	const struct soc15_reg_entry *reg,
6602 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6603 	uint32_t *sec_count, uint32_t *ded_count)
6604 {
6605 	uint32_t i;
6606 	uint32_t sec_cnt, ded_cnt;
6607 
6608 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6609 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6610 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6611 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6612 			continue;
6613 
6614 		sec_cnt = (value &
6615 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6616 				gfx_v9_0_ras_fields[i].sec_count_shift;
6617 		if (sec_cnt) {
6618 			dev_info(adev->dev, "GFX SubBlock %s, "
6619 				"Instance[%d][%d], SEC %d\n",
6620 				gfx_v9_0_ras_fields[i].name,
6621 				se_id, inst_id,
6622 				sec_cnt);
6623 			*sec_count += sec_cnt;
6624 		}
6625 
6626 		ded_cnt = (value &
6627 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6628 				gfx_v9_0_ras_fields[i].ded_count_shift;
6629 		if (ded_cnt) {
6630 			dev_info(adev->dev, "GFX SubBlock %s, "
6631 				"Instance[%d][%d], DED %d\n",
6632 				gfx_v9_0_ras_fields[i].name,
6633 				se_id, inst_id,
6634 				ded_cnt);
6635 			*ded_count += ded_cnt;
6636 		}
6637 	}
6638 
6639 	return 0;
6640 }
6641 
6642 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6643 {
6644 	int i, j, k;
6645 
6646 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6647 		return;
6648 
6649 	/* read back registers to clear the counters */
6650 	mutex_lock(&adev->grbm_idx_mutex);
6651 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6652 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6653 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6654 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
6655 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6656 			}
6657 		}
6658 	}
6659 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6660 	mutex_unlock(&adev->grbm_idx_mutex);
6661 
6662 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6663 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6664 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6665 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6666 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6667 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6668 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6669 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6670 
6671 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6672 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6673 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6674 	}
6675 
6676 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6677 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6678 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6679 	}
6680 
6681 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6682 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6683 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6684 	}
6685 
6686 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6687 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6688 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6689 	}
6690 
6691 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6692 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6693 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6694 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6695 }
6696 
6697 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6698 					  void *ras_error_status)
6699 {
6700 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6701 	uint32_t sec_count = 0, ded_count = 0;
6702 	uint32_t i, j, k;
6703 	uint32_t reg_value;
6704 
6705 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6706 		return;
6707 
6708 	err_data->ue_count = 0;
6709 	err_data->ce_count = 0;
6710 
6711 	mutex_lock(&adev->grbm_idx_mutex);
6712 
6713 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6714 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6715 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6716 				amdgpu_gfx_select_se_sh(adev, j, 0, k);
6717 				reg_value =
6718 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6719 				if (reg_value)
6720 					gfx_v9_0_ras_error_count(adev,
6721 						&gfx_v9_0_edc_counter_regs[i],
6722 						j, k, reg_value,
6723 						&sec_count, &ded_count);
6724 			}
6725 		}
6726 	}
6727 
6728 	err_data->ce_count += sec_count;
6729 	err_data->ue_count += ded_count;
6730 
6731 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6732 	mutex_unlock(&adev->grbm_idx_mutex);
6733 
6734 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6735 }
6736 
6737 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6738 {
6739 	const unsigned int cp_coher_cntl =
6740 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6741 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6742 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6743 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6744 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6745 
6746 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6747 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6748 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6749 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6750 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6751 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6752 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6753 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6754 }
6755 
6756 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6757 					uint32_t pipe, bool enable)
6758 {
6759 	struct amdgpu_device *adev = ring->adev;
6760 	uint32_t val;
6761 	uint32_t wcl_cs_reg;
6762 
6763 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6764 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6765 
6766 	switch (pipe) {
6767 	case 0:
6768 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6769 		break;
6770 	case 1:
6771 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6772 		break;
6773 	case 2:
6774 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6775 		break;
6776 	case 3:
6777 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6778 		break;
6779 	default:
6780 		DRM_DEBUG("invalid pipe %d\n", pipe);
6781 		return;
6782 	}
6783 
6784 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6785 
6786 }
6787 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6788 {
6789 	struct amdgpu_device *adev = ring->adev;
6790 	uint32_t val;
6791 	int i;
6792 
6793 
6794 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6795 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6796 	 * around 25% of gpu resources.
6797 	 */
6798 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6799 	amdgpu_ring_emit_wreg(ring,
6800 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6801 			      val);
6802 
6803 	/* Restrict waves for normal/low priority compute queues as well
6804 	 * to get best QoS for high priority compute jobs.
6805 	 *
6806 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6807 	 */
6808 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6809 		if (i != ring->pipe)
6810 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6811 
6812 	}
6813 }
6814 
6815 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6816 	.name = "gfx_v9_0",
6817 	.early_init = gfx_v9_0_early_init,
6818 	.late_init = gfx_v9_0_late_init,
6819 	.sw_init = gfx_v9_0_sw_init,
6820 	.sw_fini = gfx_v9_0_sw_fini,
6821 	.hw_init = gfx_v9_0_hw_init,
6822 	.hw_fini = gfx_v9_0_hw_fini,
6823 	.suspend = gfx_v9_0_suspend,
6824 	.resume = gfx_v9_0_resume,
6825 	.is_idle = gfx_v9_0_is_idle,
6826 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6827 	.soft_reset = gfx_v9_0_soft_reset,
6828 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6829 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6830 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6831 };
6832 
6833 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6834 	.type = AMDGPU_RING_TYPE_GFX,
6835 	.align_mask = 0xff,
6836 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6837 	.support_64bit_ptrs = true,
6838 	.secure_submission_supported = true,
6839 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6840 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6841 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6842 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6843 		5 +  /* COND_EXEC */
6844 		7 +  /* PIPELINE_SYNC */
6845 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6846 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6847 		2 + /* VM_FLUSH */
6848 		8 +  /* FENCE for VM_FLUSH */
6849 		20 + /* GDS switch */
6850 		4 + /* double SWITCH_BUFFER,
6851 		       the first COND_EXEC jump to the place just
6852 			   prior to this double SWITCH_BUFFER  */
6853 		5 + /* COND_EXEC */
6854 		7 +	 /*	HDP_flush */
6855 		4 +	 /*	VGT_flush */
6856 		14 + /*	CE_META */
6857 		31 + /*	DE_META */
6858 		3 + /* CNTX_CTRL */
6859 		5 + /* HDP_INVL */
6860 		8 + 8 + /* FENCE x2 */
6861 		2 + /* SWITCH_BUFFER */
6862 		7, /* gfx_v9_0_emit_mem_sync */
6863 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6864 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6865 	.emit_fence = gfx_v9_0_ring_emit_fence,
6866 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6867 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6868 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6869 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6870 	.test_ring = gfx_v9_0_ring_test_ring,
6871 	.insert_nop = amdgpu_ring_insert_nop,
6872 	.pad_ib = amdgpu_ring_generic_pad_ib,
6873 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6874 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6875 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6876 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6877 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
6878 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6879 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6880 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6881 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6882 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6883 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6884 };
6885 
6886 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6887 	.type = AMDGPU_RING_TYPE_GFX,
6888 	.align_mask = 0xff,
6889 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890 	.support_64bit_ptrs = true,
6891 	.secure_submission_supported = true,
6892 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6893 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6894 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6895 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6896 		5 +  /* COND_EXEC */
6897 		7 +  /* PIPELINE_SYNC */
6898 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6899 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6900 		2 + /* VM_FLUSH */
6901 		8 +  /* FENCE for VM_FLUSH */
6902 		20 + /* GDS switch */
6903 		4 + /* double SWITCH_BUFFER,
6904 		     * the first COND_EXEC jump to the place just
6905 		     * prior to this double SWITCH_BUFFER
6906 		     */
6907 		5 + /* COND_EXEC */
6908 		7 +	 /*	HDP_flush */
6909 		4 +	 /*	VGT_flush */
6910 		14 + /*	CE_META */
6911 		31 + /*	DE_META */
6912 		3 + /* CNTX_CTRL */
6913 		5 + /* HDP_INVL */
6914 		8 + 8 + /* FENCE x2 */
6915 		2 + /* SWITCH_BUFFER */
6916 		7, /* gfx_v9_0_emit_mem_sync */
6917 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6918 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6919 	.emit_fence = gfx_v9_0_ring_emit_fence,
6920 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6921 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6922 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6923 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6924 	.test_ring = gfx_v9_0_ring_test_ring,
6925 	.test_ib = gfx_v9_0_ring_test_ib,
6926 	.insert_nop = amdgpu_sw_ring_insert_nop,
6927 	.pad_ib = amdgpu_ring_generic_pad_ib,
6928 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6929 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6930 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6931 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6932 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6933 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6934 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6935 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6936 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6937 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6938 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
6939 	.patch_de = gfx_v9_0_ring_patch_de_meta,
6940 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
6941 };
6942 
6943 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6944 	.type = AMDGPU_RING_TYPE_COMPUTE,
6945 	.align_mask = 0xff,
6946 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6947 	.support_64bit_ptrs = true,
6948 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6949 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6950 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6951 	.emit_frame_size =
6952 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6953 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6954 		5 + /* hdp invalidate */
6955 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6956 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6957 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6958 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6959 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6960 		7 + /* gfx_v9_0_emit_mem_sync */
6961 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6962 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6963 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6964 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6965 	.emit_fence = gfx_v9_0_ring_emit_fence,
6966 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6967 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6968 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6969 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6970 	.test_ring = gfx_v9_0_ring_test_ring,
6971 	.test_ib = gfx_v9_0_ring_test_ib,
6972 	.insert_nop = amdgpu_ring_insert_nop,
6973 	.pad_ib = amdgpu_ring_generic_pad_ib,
6974 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6975 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6976 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6977 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6978 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6979 };
6980 
6981 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6982 	.type = AMDGPU_RING_TYPE_KIQ,
6983 	.align_mask = 0xff,
6984 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985 	.support_64bit_ptrs = true,
6986 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6987 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6988 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6989 	.emit_frame_size =
6990 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6991 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6992 		5 + /* hdp invalidate */
6993 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6994 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6995 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6996 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6997 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6998 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6999 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7000 	.test_ring = gfx_v9_0_ring_test_ring,
7001 	.insert_nop = amdgpu_ring_insert_nop,
7002 	.pad_ib = amdgpu_ring_generic_pad_ib,
7003 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7004 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7005 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7006 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7007 };
7008 
7009 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7010 {
7011 	int i;
7012 
7013 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7014 
7015 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7016 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7017 
7018 	if (adev->gfx.num_gfx_rings) {
7019 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7020 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7021 	}
7022 
7023 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7024 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7025 }
7026 
7027 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7028 	.set = gfx_v9_0_set_eop_interrupt_state,
7029 	.process = gfx_v9_0_eop_irq,
7030 };
7031 
7032 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7033 	.set = gfx_v9_0_set_priv_reg_fault_state,
7034 	.process = gfx_v9_0_priv_reg_irq,
7035 };
7036 
7037 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7038 	.set = gfx_v9_0_set_priv_inst_fault_state,
7039 	.process = gfx_v9_0_priv_inst_irq,
7040 };
7041 
7042 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7043 	.set = gfx_v9_0_set_cp_ecc_error_state,
7044 	.process = amdgpu_gfx_cp_ecc_error_irq,
7045 };
7046 
7047 
7048 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7049 {
7050 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7051 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7052 
7053 	adev->gfx.priv_reg_irq.num_types = 1;
7054 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7055 
7056 	adev->gfx.priv_inst_irq.num_types = 1;
7057 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7058 
7059 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7060 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7061 }
7062 
7063 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7064 {
7065 	switch (adev->ip_versions[GC_HWIP][0]) {
7066 	case IP_VERSION(9, 0, 1):
7067 	case IP_VERSION(9, 2, 1):
7068 	case IP_VERSION(9, 4, 0):
7069 	case IP_VERSION(9, 2, 2):
7070 	case IP_VERSION(9, 1, 0):
7071 	case IP_VERSION(9, 4, 1):
7072 	case IP_VERSION(9, 3, 0):
7073 	case IP_VERSION(9, 4, 2):
7074 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7075 		break;
7076 	default:
7077 		break;
7078 	}
7079 }
7080 
7081 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7082 {
7083 	/* init asci gds info */
7084 	switch (adev->ip_versions[GC_HWIP][0]) {
7085 	case IP_VERSION(9, 0, 1):
7086 	case IP_VERSION(9, 2, 1):
7087 	case IP_VERSION(9, 4, 0):
7088 		adev->gds.gds_size = 0x10000;
7089 		break;
7090 	case IP_VERSION(9, 2, 2):
7091 	case IP_VERSION(9, 1, 0):
7092 	case IP_VERSION(9, 4, 1):
7093 		adev->gds.gds_size = 0x1000;
7094 		break;
7095 	case IP_VERSION(9, 4, 2):
7096 		/* aldebaran removed all the GDS internal memory,
7097 		 * only support GWS opcode in kernel, like barrier
7098 		 * semaphore.etc */
7099 		adev->gds.gds_size = 0;
7100 		break;
7101 	default:
7102 		adev->gds.gds_size = 0x10000;
7103 		break;
7104 	}
7105 
7106 	switch (adev->ip_versions[GC_HWIP][0]) {
7107 	case IP_VERSION(9, 0, 1):
7108 	case IP_VERSION(9, 4, 0):
7109 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7110 		break;
7111 	case IP_VERSION(9, 2, 1):
7112 		adev->gds.gds_compute_max_wave_id = 0x27f;
7113 		break;
7114 	case IP_VERSION(9, 2, 2):
7115 	case IP_VERSION(9, 1, 0):
7116 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7117 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7118 		else
7119 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7120 		break;
7121 	case IP_VERSION(9, 4, 1):
7122 		adev->gds.gds_compute_max_wave_id = 0xfff;
7123 		break;
7124 	case IP_VERSION(9, 4, 2):
7125 		/* deprecated for Aldebaran, no usage at all */
7126 		adev->gds.gds_compute_max_wave_id = 0;
7127 		break;
7128 	default:
7129 		/* this really depends on the chip */
7130 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7131 		break;
7132 	}
7133 
7134 	adev->gds.gws_size = 64;
7135 	adev->gds.oa_size = 16;
7136 }
7137 
7138 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7139 						 u32 bitmap)
7140 {
7141 	u32 data;
7142 
7143 	if (!bitmap)
7144 		return;
7145 
7146 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7147 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7148 
7149 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7150 }
7151 
7152 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7153 {
7154 	u32 data, mask;
7155 
7156 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7157 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7158 
7159 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7160 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7161 
7162 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7163 
7164 	return (~data) & mask;
7165 }
7166 
7167 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7168 				 struct amdgpu_cu_info *cu_info)
7169 {
7170 	int i, j, k, counter, active_cu_number = 0;
7171 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7172 	unsigned disable_masks[4 * 4];
7173 
7174 	if (!adev || !cu_info)
7175 		return -EINVAL;
7176 
7177 	/*
7178 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7179 	 */
7180 	if (adev->gfx.config.max_shader_engines *
7181 		adev->gfx.config.max_sh_per_se > 16)
7182 		return -EINVAL;
7183 
7184 	amdgpu_gfx_parse_disable_cu(disable_masks,
7185 				    adev->gfx.config.max_shader_engines,
7186 				    adev->gfx.config.max_sh_per_se);
7187 
7188 	mutex_lock(&adev->grbm_idx_mutex);
7189 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7190 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7191 			mask = 1;
7192 			ao_bitmap = 0;
7193 			counter = 0;
7194 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
7195 			gfx_v9_0_set_user_cu_inactive_bitmap(
7196 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7197 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7198 
7199 			/*
7200 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7201 			 * 4x4 size array, and it's usually suitable for Vega
7202 			 * ASICs which has 4*2 SE/SH layout.
7203 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7204 			 * To mostly reduce the impact, we make it compatible
7205 			 * with current bitmap array as below:
7206 			 *    SE4,SH0 --> bitmap[0][1]
7207 			 *    SE5,SH0 --> bitmap[1][1]
7208 			 *    SE6,SH0 --> bitmap[2][1]
7209 			 *    SE7,SH0 --> bitmap[3][1]
7210 			 */
7211 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7212 
7213 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7214 				if (bitmap & mask) {
7215 					if (counter < adev->gfx.config.max_cu_per_sh)
7216 						ao_bitmap |= mask;
7217 					counter ++;
7218 				}
7219 				mask <<= 1;
7220 			}
7221 			active_cu_number += counter;
7222 			if (i < 2 && j < 2)
7223 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7224 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7225 		}
7226 	}
7227 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7228 	mutex_unlock(&adev->grbm_idx_mutex);
7229 
7230 	cu_info->number = active_cu_number;
7231 	cu_info->ao_cu_mask = ao_cu_mask;
7232 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7233 
7234 	return 0;
7235 }
7236 
7237 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7238 {
7239 	.type = AMD_IP_BLOCK_TYPE_GFX,
7240 	.major = 9,
7241 	.minor = 0,
7242 	.rev = 0,
7243 	.funcs = &gfx_v9_0_ip_funcs,
7244 };
7245