xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 9190d4a263264eabf715f5fc1827da45e3fdc247)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54 
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58 
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133 
134 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146 
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151 
152 enum ta_ras_gfx_subblock {
153 	/*CPC*/
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPC_UCODE,
157 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164 	/* CPF*/
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168 	TA_RAS_BLOCK__GFX_CPF_TAG,
169 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170 	/* CPG*/
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174 	TA_RAS_BLOCK__GFX_CPG_TAG,
175 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176 	/* GDS*/
177 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184 	/* SPI*/
185 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186 	/* SQ*/
187 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
190 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
191 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193 	/* SQC (3 ranges)*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 	/* SQC range 0*/
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207 	/* SQC range 1*/
208 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221 	/* SQC range 2*/
222 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236 	/* TA*/
237 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244 	/* TCA*/
245 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249 	/* TCC (5 sub-ranges)*/
250 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 	/* TCC range 0*/
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262 	/* TCC range 1*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268 	/* TCC range 2*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280 	/* TCC range 3*/
281 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286 	/* TCC range 4*/
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294 	/* TCI*/
295 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296 	/* TCP*/
297 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306 	/* TD*/
307 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
308 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312 	/* EA (3 sub-ranges)*/
313 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 	/* EA range 0*/
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325 	/* EA range 1*/
326 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335 	/* EA range 2*/
336 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343 	/* UTC VM L2 bank*/
344 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345 	/* UTC VM walker*/
346 	TA_RAS_BLOCK__UTC_VML2_WALKER,
347 	/* UTC ATC L2 2MB cache*/
348 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349 	/* UTC ATC L2 4KB cache*/
350 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351 	TA_RAS_BLOCK__GFX_MAX
352 };
353 
354 struct ras_gfx_subblock {
355 	unsigned char *name;
356 	int ta_subblock;
357 	int hw_supported_error_type;
358 	int sw_supported_error_type;
359 };
360 
361 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
362 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
363 		#subblock,                                                     \
364 		TA_RAS_BLOCK__##subblock,                                      \
365 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
366 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
367 	}
368 
369 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389 			     0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     1),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430 			     0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442 			     0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456 			     1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458 			     1),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462 			     0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475 			     0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480 			     0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482 			     0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517 };
518 
519 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520 {
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541 };
542 
543 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544 {
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563 };
564 
565 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566 {
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578 };
579 
580 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581 {
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606 };
607 
608 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609 {
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617 };
618 
619 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620 {
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656 };
657 
658 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659 {
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663 };
664 
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666 {
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683 };
684 
685 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686 {
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700 };
701 
702 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703 {
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715 };
716 
717 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720 };
721 
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723 {
724 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 };
733 
734 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735 {
736 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 };
745 
746 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750 
751 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756 				struct amdgpu_cu_info *cu_info);
757 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
759 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761 					  void *ras_error_status);
762 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763 				     void *inject_if);
764 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765 
766 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
767 				uint64_t queue_mask)
768 {
769 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
770 	amdgpu_ring_write(kiq_ring,
771 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
772 		/* vmid_mask:0* queue_type:0 (KIQ) */
773 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
774 	amdgpu_ring_write(kiq_ring,
775 			lower_32_bits(queue_mask));	/* queue mask lo */
776 	amdgpu_ring_write(kiq_ring,
777 			upper_32_bits(queue_mask));	/* queue mask hi */
778 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
779 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
780 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
781 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
782 }
783 
784 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
785 				 struct amdgpu_ring *ring)
786 {
787 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
788 	uint64_t wptr_addr = ring->wptr_gpu_addr;
789 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
790 
791 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
792 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
793 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
794 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
795 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
796 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
797 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
798 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
799 			 /*queue_type: normal compute queue */
800 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
801 			 /* alloc format: all_on_one_pipe */
802 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
803 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
804 			 /* num_queues: must be 1 */
805 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
806 	amdgpu_ring_write(kiq_ring,
807 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
808 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
809 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
810 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
811 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
812 }
813 
814 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
815 				   struct amdgpu_ring *ring,
816 				   enum amdgpu_unmap_queues_action action,
817 				   u64 gpu_addr, u64 seq)
818 {
819 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
820 
821 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
822 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
823 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
824 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
825 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
826 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
827 	amdgpu_ring_write(kiq_ring,
828 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
829 
830 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
831 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
832 		amdgpu_ring_write(kiq_ring, 0);
833 		amdgpu_ring_write(kiq_ring, 0);
834 
835 	} else {
836 		amdgpu_ring_write(kiq_ring, 0);
837 		amdgpu_ring_write(kiq_ring, 0);
838 		amdgpu_ring_write(kiq_ring, 0);
839 	}
840 }
841 
842 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
843 				   struct amdgpu_ring *ring,
844 				   u64 addr,
845 				   u64 seq)
846 {
847 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
848 
849 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
850 	amdgpu_ring_write(kiq_ring,
851 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
852 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
853 			  PACKET3_QUERY_STATUS_COMMAND(2));
854 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855 	amdgpu_ring_write(kiq_ring,
856 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
857 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
858 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
859 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
860 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
861 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
862 }
863 
864 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
865 				uint16_t pasid, uint32_t flush_type,
866 				bool all_hub)
867 {
868 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
869 	amdgpu_ring_write(kiq_ring,
870 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
871 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
872 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
873 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
874 }
875 
876 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
877 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
878 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
879 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
880 	.kiq_query_status = gfx_v9_0_kiq_query_status,
881 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
882 	.set_resources_size = 8,
883 	.map_queues_size = 7,
884 	.unmap_queues_size = 6,
885 	.query_status_size = 7,
886 	.invalidate_tlbs_size = 2,
887 };
888 
889 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
890 {
891 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
892 }
893 
894 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
895 {
896 	switch (adev->ip_versions[GC_HWIP][0]) {
897 	case IP_VERSION(9, 0, 1):
898 		soc15_program_register_sequence(adev,
899 						golden_settings_gc_9_0,
900 						ARRAY_SIZE(golden_settings_gc_9_0));
901 		soc15_program_register_sequence(adev,
902 						golden_settings_gc_9_0_vg10,
903 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
904 		break;
905 	case IP_VERSION(9, 2, 1):
906 		soc15_program_register_sequence(adev,
907 						golden_settings_gc_9_2_1,
908 						ARRAY_SIZE(golden_settings_gc_9_2_1));
909 		soc15_program_register_sequence(adev,
910 						golden_settings_gc_9_2_1_vg12,
911 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
912 		break;
913 	case IP_VERSION(9, 4, 0):
914 		soc15_program_register_sequence(adev,
915 						golden_settings_gc_9_0,
916 						ARRAY_SIZE(golden_settings_gc_9_0));
917 		soc15_program_register_sequence(adev,
918 						golden_settings_gc_9_0_vg20,
919 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
920 		break;
921 	case IP_VERSION(9, 4, 1):
922 		soc15_program_register_sequence(adev,
923 						golden_settings_gc_9_4_1_arct,
924 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
925 		break;
926 	case IP_VERSION(9, 2, 2):
927 	case IP_VERSION(9, 1, 0):
928 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
929 						ARRAY_SIZE(golden_settings_gc_9_1));
930 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
931 			soc15_program_register_sequence(adev,
932 							golden_settings_gc_9_1_rv2,
933 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
934 		else
935 			soc15_program_register_sequence(adev,
936 							golden_settings_gc_9_1_rv1,
937 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
938 		break;
939 	 case IP_VERSION(9, 3, 0):
940 		soc15_program_register_sequence(adev,
941 						golden_settings_gc_9_1_rn,
942 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
943 		return; /* for renoir, don't need common goldensetting */
944 	case IP_VERSION(9, 4, 2):
945 		gfx_v9_4_2_init_golden_registers(adev,
946 						 adev->smuio.funcs->get_die_id(adev));
947 		break;
948 	default:
949 		break;
950 	}
951 
952 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
953 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
954 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
955 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
956 }
957 
958 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
959 				       bool wc, uint32_t reg, uint32_t val)
960 {
961 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
962 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
963 				WRITE_DATA_DST_SEL(0) |
964 				(wc ? WR_CONFIRM : 0));
965 	amdgpu_ring_write(ring, reg);
966 	amdgpu_ring_write(ring, 0);
967 	amdgpu_ring_write(ring, val);
968 }
969 
970 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
971 				  int mem_space, int opt, uint32_t addr0,
972 				  uint32_t addr1, uint32_t ref, uint32_t mask,
973 				  uint32_t inv)
974 {
975 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
976 	amdgpu_ring_write(ring,
977 				 /* memory (1) or register (0) */
978 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
979 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
980 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
981 				 WAIT_REG_MEM_ENGINE(eng_sel)));
982 
983 	if (mem_space)
984 		BUG_ON(addr0 & 0x3); /* Dword align */
985 	amdgpu_ring_write(ring, addr0);
986 	amdgpu_ring_write(ring, addr1);
987 	amdgpu_ring_write(ring, ref);
988 	amdgpu_ring_write(ring, mask);
989 	amdgpu_ring_write(ring, inv); /* poll interval */
990 }
991 
992 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
993 {
994 	struct amdgpu_device *adev = ring->adev;
995 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
996 	uint32_t tmp = 0;
997 	unsigned i;
998 	int r;
999 
1000 	WREG32(scratch, 0xCAFEDEAD);
1001 	r = amdgpu_ring_alloc(ring, 3);
1002 	if (r)
1003 		return r;
1004 
1005 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1006 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1007 	amdgpu_ring_write(ring, 0xDEADBEEF);
1008 	amdgpu_ring_commit(ring);
1009 
1010 	for (i = 0; i < adev->usec_timeout; i++) {
1011 		tmp = RREG32(scratch);
1012 		if (tmp == 0xDEADBEEF)
1013 			break;
1014 		udelay(1);
1015 	}
1016 
1017 	if (i >= adev->usec_timeout)
1018 		r = -ETIMEDOUT;
1019 	return r;
1020 }
1021 
1022 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1023 {
1024 	struct amdgpu_device *adev = ring->adev;
1025 	struct amdgpu_ib ib;
1026 	struct dma_fence *f = NULL;
1027 
1028 	unsigned index;
1029 	uint64_t gpu_addr;
1030 	uint32_t tmp;
1031 	long r;
1032 
1033 	r = amdgpu_device_wb_get(adev, &index);
1034 	if (r)
1035 		return r;
1036 
1037 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1038 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1039 	memset(&ib, 0, sizeof(ib));
1040 	r = amdgpu_ib_get(adev, NULL, 16,
1041 					AMDGPU_IB_POOL_DIRECT, &ib);
1042 	if (r)
1043 		goto err1;
1044 
1045 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1046 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1047 	ib.ptr[2] = lower_32_bits(gpu_addr);
1048 	ib.ptr[3] = upper_32_bits(gpu_addr);
1049 	ib.ptr[4] = 0xDEADBEEF;
1050 	ib.length_dw = 5;
1051 
1052 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1053 	if (r)
1054 		goto err2;
1055 
1056 	r = dma_fence_wait_timeout(f, false, timeout);
1057 	if (r == 0) {
1058 		r = -ETIMEDOUT;
1059 		goto err2;
1060 	} else if (r < 0) {
1061 		goto err2;
1062 	}
1063 
1064 	tmp = adev->wb.wb[index];
1065 	if (tmp == 0xDEADBEEF)
1066 		r = 0;
1067 	else
1068 		r = -EINVAL;
1069 
1070 err2:
1071 	amdgpu_ib_free(adev, &ib, NULL);
1072 	dma_fence_put(f);
1073 err1:
1074 	amdgpu_device_wb_free(adev, index);
1075 	return r;
1076 }
1077 
1078 
1079 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1080 {
1081 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1082 	amdgpu_ucode_release(&adev->gfx.me_fw);
1083 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1084 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1085 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1086 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1087 
1088 	kfree(adev->gfx.rlc.register_list_format);
1089 }
1090 
1091 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1092 {
1093 	adev->gfx.me_fw_write_wait = false;
1094 	adev->gfx.mec_fw_write_wait = false;
1095 
1096 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1097 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1098 	    (adev->gfx.mec_feature_version < 46) ||
1099 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1100 	    (adev->gfx.pfp_feature_version < 46)))
1101 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1102 
1103 	switch (adev->ip_versions[GC_HWIP][0]) {
1104 	case IP_VERSION(9, 0, 1):
1105 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1106 		    (adev->gfx.me_feature_version >= 42) &&
1107 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1108 		    (adev->gfx.pfp_feature_version >= 42))
1109 			adev->gfx.me_fw_write_wait = true;
1110 
1111 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1112 		    (adev->gfx.mec_feature_version >= 42))
1113 			adev->gfx.mec_fw_write_wait = true;
1114 		break;
1115 	case IP_VERSION(9, 2, 1):
1116 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1117 		    (adev->gfx.me_feature_version >= 44) &&
1118 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1119 		    (adev->gfx.pfp_feature_version >= 44))
1120 			adev->gfx.me_fw_write_wait = true;
1121 
1122 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1123 		    (adev->gfx.mec_feature_version >= 44))
1124 			adev->gfx.mec_fw_write_wait = true;
1125 		break;
1126 	case IP_VERSION(9, 4, 0):
1127 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1128 		    (adev->gfx.me_feature_version >= 44) &&
1129 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1130 		    (adev->gfx.pfp_feature_version >= 44))
1131 			adev->gfx.me_fw_write_wait = true;
1132 
1133 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1134 		    (adev->gfx.mec_feature_version >= 44))
1135 			adev->gfx.mec_fw_write_wait = true;
1136 		break;
1137 	case IP_VERSION(9, 1, 0):
1138 	case IP_VERSION(9, 2, 2):
1139 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1140 		    (adev->gfx.me_feature_version >= 42) &&
1141 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1142 		    (adev->gfx.pfp_feature_version >= 42))
1143 			adev->gfx.me_fw_write_wait = true;
1144 
1145 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1146 		    (adev->gfx.mec_feature_version >= 42))
1147 			adev->gfx.mec_fw_write_wait = true;
1148 		break;
1149 	default:
1150 		adev->gfx.me_fw_write_wait = true;
1151 		adev->gfx.mec_fw_write_wait = true;
1152 		break;
1153 	}
1154 }
1155 
1156 struct amdgpu_gfxoff_quirk {
1157 	u16 chip_vendor;
1158 	u16 chip_device;
1159 	u16 subsys_vendor;
1160 	u16 subsys_device;
1161 	u8 revision;
1162 };
1163 
1164 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1165 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1166 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1167 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1168 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1169 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1170 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1171 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1172 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1173 	{ 0, 0, 0, 0, 0 },
1174 };
1175 
1176 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1177 {
1178 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1179 
1180 	while (p && p->chip_device != 0) {
1181 		if (pdev->vendor == p->chip_vendor &&
1182 		    pdev->device == p->chip_device &&
1183 		    pdev->subsystem_vendor == p->subsys_vendor &&
1184 		    pdev->subsystem_device == p->subsys_device &&
1185 		    pdev->revision == p->revision) {
1186 			return true;
1187 		}
1188 		++p;
1189 	}
1190 	return false;
1191 }
1192 
1193 static bool is_raven_kicker(struct amdgpu_device *adev)
1194 {
1195 	if (adev->pm.fw_version >= 0x41e2b)
1196 		return true;
1197 	else
1198 		return false;
1199 }
1200 
1201 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1202 {
1203 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1204 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1205 	    (adev->gfx.me_feature_version >= 52))
1206 		return true;
1207 	else
1208 		return false;
1209 }
1210 
1211 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1212 {
1213 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1214 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1215 
1216 	switch (adev->ip_versions[GC_HWIP][0]) {
1217 	case IP_VERSION(9, 0, 1):
1218 	case IP_VERSION(9, 2, 1):
1219 	case IP_VERSION(9, 4, 0):
1220 		break;
1221 	case IP_VERSION(9, 2, 2):
1222 	case IP_VERSION(9, 1, 0):
1223 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1224 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1225 		    ((!is_raven_kicker(adev) &&
1226 		      adev->gfx.rlc_fw_version < 531) ||
1227 		     (adev->gfx.rlc_feature_version < 1) ||
1228 		     !adev->gfx.rlc.is_rlc_v2_1))
1229 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1230 
1231 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1232 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1233 				AMD_PG_SUPPORT_CP |
1234 				AMD_PG_SUPPORT_RLC_SMU_HS;
1235 		break;
1236 	case IP_VERSION(9, 3, 0):
1237 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1238 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1239 				AMD_PG_SUPPORT_CP |
1240 				AMD_PG_SUPPORT_RLC_SMU_HS;
1241 		break;
1242 	default:
1243 		break;
1244 	}
1245 }
1246 
1247 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1248 					  char *chip_name)
1249 {
1250 	char fw_name[30];
1251 	int err;
1252 
1253 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1254 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1255 	if (err)
1256 		goto out;
1257 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1258 
1259 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1260 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1261 	if (err)
1262 		goto out;
1263 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1264 
1265 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1266 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1267 	if (err)
1268 		goto out;
1269 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1270 
1271 out:
1272 	if (err) {
1273 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1274 		amdgpu_ucode_release(&adev->gfx.me_fw);
1275 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1276 	}
1277 	return err;
1278 }
1279 
1280 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1281 				       char *chip_name)
1282 {
1283 	char fw_name[30];
1284 	int err;
1285 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1286 	uint16_t version_major;
1287 	uint16_t version_minor;
1288 	uint32_t smu_version;
1289 
1290 	/*
1291 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1292 	 * instead of picasso_rlc.bin.
1293 	 * Judgment method:
1294 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1295 	 *          or revision >= 0xD8 && revision <= 0xDF
1296 	 * otherwise is PCO FP5
1297 	 */
1298 	if (!strcmp(chip_name, "picasso") &&
1299 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1300 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1301 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1302 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1303 		(smu_version >= 0x41e2b))
1304 		/**
1305 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1306 		*/
1307 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1308 	else
1309 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1310 	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1311 	if (err)
1312 		goto out;
1313 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1314 
1315 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1316 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1317 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1318 out:
1319 	if (err)
1320 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1321 
1322 	return err;
1323 }
1324 
1325 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1326 {
1327 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1328 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1329 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1330 		return false;
1331 
1332 	return true;
1333 }
1334 
1335 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1336 					      char *chip_name)
1337 {
1338 	char fw_name[30];
1339 	int err;
1340 
1341 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1342 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1343 	else
1344 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1345 
1346 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1347 	if (err)
1348 		goto out;
1349 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1350 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1351 
1352 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1353 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1354 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1355 		else
1356 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1357 
1358 		/* ignore failures to load */
1359 		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1360 		if (!err) {
1361 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1362 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1363 		} else {
1364 			err = 0;
1365 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1366 		}
1367 	} else {
1368 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1369 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1370 	}
1371 
1372 	gfx_v9_0_check_if_need_gfxoff(adev);
1373 	gfx_v9_0_check_fw_write_wait(adev);
1374 
1375 out:
1376 	if (err)
1377 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1378 	return err;
1379 }
1380 
1381 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1382 {
1383 	char ucode_prefix[30];
1384 	int r;
1385 
1386 	DRM_DEBUG("\n");
1387 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1388 
1389 	/* No CPG in Arcturus */
1390 	if (adev->gfx.num_gfx_rings) {
1391 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1392 		if (r)
1393 			return r;
1394 	}
1395 
1396 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1397 	if (r)
1398 		return r;
1399 
1400 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1401 	if (r)
1402 		return r;
1403 
1404 	return r;
1405 }
1406 
1407 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1408 {
1409 	u32 count = 0;
1410 	const struct cs_section_def *sect = NULL;
1411 	const struct cs_extent_def *ext = NULL;
1412 
1413 	/* begin clear state */
1414 	count += 2;
1415 	/* context control state */
1416 	count += 3;
1417 
1418 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1419 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1420 			if (sect->id == SECT_CONTEXT)
1421 				count += 2 + ext->reg_count;
1422 			else
1423 				return 0;
1424 		}
1425 	}
1426 
1427 	/* end clear state */
1428 	count += 2;
1429 	/* clear state */
1430 	count += 2;
1431 
1432 	return count;
1433 }
1434 
1435 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1436 				    volatile u32 *buffer)
1437 {
1438 	u32 count = 0, i;
1439 	const struct cs_section_def *sect = NULL;
1440 	const struct cs_extent_def *ext = NULL;
1441 
1442 	if (adev->gfx.rlc.cs_data == NULL)
1443 		return;
1444 	if (buffer == NULL)
1445 		return;
1446 
1447 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1448 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1449 
1450 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1451 	buffer[count++] = cpu_to_le32(0x80000000);
1452 	buffer[count++] = cpu_to_le32(0x80000000);
1453 
1454 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1455 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1456 			if (sect->id == SECT_CONTEXT) {
1457 				buffer[count++] =
1458 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1459 				buffer[count++] = cpu_to_le32(ext->reg_index -
1460 						PACKET3_SET_CONTEXT_REG_START);
1461 				for (i = 0; i < ext->reg_count; i++)
1462 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1463 			} else {
1464 				return;
1465 			}
1466 		}
1467 	}
1468 
1469 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1470 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1471 
1472 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1473 	buffer[count++] = cpu_to_le32(0);
1474 }
1475 
1476 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1477 {
1478 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1479 	uint32_t pg_always_on_cu_num = 2;
1480 	uint32_t always_on_cu_num;
1481 	uint32_t i, j, k;
1482 	uint32_t mask, cu_bitmap, counter;
1483 
1484 	if (adev->flags & AMD_IS_APU)
1485 		always_on_cu_num = 4;
1486 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1487 		always_on_cu_num = 8;
1488 	else
1489 		always_on_cu_num = 12;
1490 
1491 	mutex_lock(&adev->grbm_idx_mutex);
1492 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1493 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1494 			mask = 1;
1495 			cu_bitmap = 0;
1496 			counter = 0;
1497 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
1498 
1499 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1500 				if (cu_info->bitmap[i][j] & mask) {
1501 					if (counter == pg_always_on_cu_num)
1502 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1503 					if (counter < always_on_cu_num)
1504 						cu_bitmap |= mask;
1505 					else
1506 						break;
1507 					counter++;
1508 				}
1509 				mask <<= 1;
1510 			}
1511 
1512 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1513 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1514 		}
1515 	}
1516 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1517 	mutex_unlock(&adev->grbm_idx_mutex);
1518 }
1519 
1520 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1521 {
1522 	uint32_t data;
1523 
1524 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1525 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1526 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1527 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1528 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1529 
1530 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1531 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1532 
1533 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1534 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1535 
1536 	mutex_lock(&adev->grbm_idx_mutex);
1537 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1538 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1539 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1540 
1541 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1542 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1543 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1544 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1545 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1546 
1547 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1548 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1549 	data &= 0x0000FFFF;
1550 	data |= 0x00C00000;
1551 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1552 
1553 	/*
1554 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1555 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1556 	 */
1557 
1558 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1559 	 * but used for RLC_LB_CNTL configuration */
1560 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1561 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1562 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1563 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1564 	mutex_unlock(&adev->grbm_idx_mutex);
1565 
1566 	gfx_v9_0_init_always_on_cu_mask(adev);
1567 }
1568 
1569 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1570 {
1571 	uint32_t data;
1572 
1573 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1574 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1575 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1576 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1577 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1578 
1579 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1580 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1581 
1582 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1583 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1584 
1585 	mutex_lock(&adev->grbm_idx_mutex);
1586 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1587 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1588 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1589 
1590 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1591 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1592 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1593 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1594 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1595 
1596 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1597 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1598 	data &= 0x0000FFFF;
1599 	data |= 0x00C00000;
1600 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1601 
1602 	/*
1603 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1604 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1605 	 */
1606 
1607 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1608 	 * but used for RLC_LB_CNTL configuration */
1609 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1610 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1611 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1612 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1613 	mutex_unlock(&adev->grbm_idx_mutex);
1614 
1615 	gfx_v9_0_init_always_on_cu_mask(adev);
1616 }
1617 
1618 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1619 {
1620 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1621 }
1622 
1623 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1624 {
1625 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1626 		return 5;
1627 	else
1628 		return 4;
1629 }
1630 
1631 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1632 {
1633 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1634 
1635 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1636 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1637 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1638 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1639 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1640 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1641 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1642 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1643 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1644 }
1645 
1646 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1647 {
1648 	const struct cs_section_def *cs_data;
1649 	int r;
1650 
1651 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1652 
1653 	cs_data = adev->gfx.rlc.cs_data;
1654 
1655 	if (cs_data) {
1656 		/* init clear state block */
1657 		r = amdgpu_gfx_rlc_init_csb(adev);
1658 		if (r)
1659 			return r;
1660 	}
1661 
1662 	if (adev->flags & AMD_IS_APU) {
1663 		/* TODO: double check the cp_table_size for RV */
1664 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1665 		r = amdgpu_gfx_rlc_init_cpt(adev);
1666 		if (r)
1667 			return r;
1668 	}
1669 
1670 	switch (adev->ip_versions[GC_HWIP][0]) {
1671 	case IP_VERSION(9, 2, 2):
1672 	case IP_VERSION(9, 1, 0):
1673 		gfx_v9_0_init_lbpw(adev);
1674 		break;
1675 	case IP_VERSION(9, 4, 0):
1676 		gfx_v9_4_init_lbpw(adev);
1677 		break;
1678 	default:
1679 		break;
1680 	}
1681 
1682 	/* init spm vmid with 0xf */
1683 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1684 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1685 
1686 	return 0;
1687 }
1688 
1689 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1690 {
1691 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1692 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1693 }
1694 
1695 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1696 {
1697 	int r;
1698 	u32 *hpd;
1699 	const __le32 *fw_data;
1700 	unsigned fw_size;
1701 	u32 *fw;
1702 	size_t mec_hpd_size;
1703 
1704 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1705 
1706 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1707 
1708 	/* take ownership of the relevant compute queues */
1709 	amdgpu_gfx_compute_queue_acquire(adev);
1710 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1711 	if (mec_hpd_size) {
1712 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1713 					      AMDGPU_GEM_DOMAIN_VRAM |
1714 					      AMDGPU_GEM_DOMAIN_GTT,
1715 					      &adev->gfx.mec.hpd_eop_obj,
1716 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1717 					      (void **)&hpd);
1718 		if (r) {
1719 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1720 			gfx_v9_0_mec_fini(adev);
1721 			return r;
1722 		}
1723 
1724 		memset(hpd, 0, mec_hpd_size);
1725 
1726 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1727 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1728 	}
1729 
1730 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1731 
1732 	fw_data = (const __le32 *)
1733 		(adev->gfx.mec_fw->data +
1734 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1735 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1736 
1737 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1738 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1739 				      &adev->gfx.mec.mec_fw_obj,
1740 				      &adev->gfx.mec.mec_fw_gpu_addr,
1741 				      (void **)&fw);
1742 	if (r) {
1743 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1744 		gfx_v9_0_mec_fini(adev);
1745 		return r;
1746 	}
1747 
1748 	memcpy(fw, fw_data, fw_size);
1749 
1750 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1751 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1752 
1753 	return 0;
1754 }
1755 
1756 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1757 {
1758 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1759 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1760 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1761 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1762 		(SQ_IND_INDEX__FORCE_READ_MASK));
1763 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1764 }
1765 
1766 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1767 			   uint32_t wave, uint32_t thread,
1768 			   uint32_t regno, uint32_t num, uint32_t *out)
1769 {
1770 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1771 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1772 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1773 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1774 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1775 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1776 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1777 	while (num--)
1778 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1779 }
1780 
1781 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1782 {
1783 	/* type 1 wave data */
1784 	dst[(*no_fields)++] = 1;
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1790 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1799 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1800 }
1801 
1802 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1803 				     uint32_t wave, uint32_t start,
1804 				     uint32_t size, uint32_t *dst)
1805 {
1806 	wave_read_regs(
1807 		adev, simd, wave, 0,
1808 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1809 }
1810 
1811 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1812 				     uint32_t wave, uint32_t thread,
1813 				     uint32_t start, uint32_t size,
1814 				     uint32_t *dst)
1815 {
1816 	wave_read_regs(
1817 		adev, simd, wave, thread,
1818 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1819 }
1820 
1821 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1822 				  u32 me, u32 pipe, u32 q, u32 vm)
1823 {
1824 	soc15_grbm_select(adev, me, pipe, q, vm);
1825 }
1826 
1827 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1828         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1829         .select_se_sh = &gfx_v9_0_select_se_sh,
1830         .read_wave_data = &gfx_v9_0_read_wave_data,
1831         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1832         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1833         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1834 };
1835 
1836 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1837 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1838 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1839 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1840 };
1841 
1842 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1843 	.ras_block = {
1844 		.hw_ops = &gfx_v9_0_ras_ops,
1845 	},
1846 };
1847 
1848 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1849 {
1850 	u32 gb_addr_config;
1851 	int err;
1852 
1853 	switch (adev->ip_versions[GC_HWIP][0]) {
1854 	case IP_VERSION(9, 0, 1):
1855 		adev->gfx.config.max_hw_contexts = 8;
1856 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1857 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1858 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1859 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1860 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1861 		break;
1862 	case IP_VERSION(9, 2, 1):
1863 		adev->gfx.config.max_hw_contexts = 8;
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1868 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1869 		DRM_INFO("fix gfx.config for vega12\n");
1870 		break;
1871 	case IP_VERSION(9, 4, 0):
1872 		adev->gfx.ras = &gfx_v9_0_ras;
1873 		adev->gfx.config.max_hw_contexts = 8;
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1878 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1879 		gb_addr_config &= ~0xf3e777ff;
1880 		gb_addr_config |= 0x22014042;
1881 		/* check vbios table if gpu info is not available */
1882 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1883 		if (err)
1884 			return err;
1885 		break;
1886 	case IP_VERSION(9, 2, 2):
1887 	case IP_VERSION(9, 1, 0):
1888 		adev->gfx.config.max_hw_contexts = 8;
1889 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1894 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1895 		else
1896 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1897 		break;
1898 	case IP_VERSION(9, 4, 1):
1899 		adev->gfx.ras = &gfx_v9_4_ras;
1900 		adev->gfx.config.max_hw_contexts = 8;
1901 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1905 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1906 		gb_addr_config &= ~0xf3e777ff;
1907 		gb_addr_config |= 0x22014042;
1908 		break;
1909 	case IP_VERSION(9, 3, 0):
1910 		adev->gfx.config.max_hw_contexts = 8;
1911 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1912 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1913 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1914 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1915 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1916 		gb_addr_config &= ~0xf3e777ff;
1917 		gb_addr_config |= 0x22010042;
1918 		break;
1919 	case IP_VERSION(9, 4, 2):
1920 		adev->gfx.ras = &gfx_v9_4_2_ras;
1921 		adev->gfx.config.max_hw_contexts = 8;
1922 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1923 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1924 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1925 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1926 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1927 		gb_addr_config &= ~0xf3e777ff;
1928 		gb_addr_config |= 0x22014042;
1929 		/* check vbios table if gpu info is not available */
1930 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1931 		if (err)
1932 			return err;
1933 		break;
1934 	default:
1935 		BUG();
1936 		break;
1937 	}
1938 
1939 	if (adev->gfx.ras) {
1940 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
1941 		if (err) {
1942 			DRM_ERROR("Failed to register gfx ras block!\n");
1943 			return err;
1944 		}
1945 
1946 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
1947 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
1948 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
1949 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
1950 
1951 		/* If not define special ras_late_init function, use gfx default ras_late_init */
1952 		if (!adev->gfx.ras->ras_block.ras_late_init)
1953 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
1954 
1955 		/* If not defined special ras_cb function, use default ras_cb */
1956 		if (!adev->gfx.ras->ras_block.ras_cb)
1957 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
1958 	}
1959 
1960 	adev->gfx.config.gb_addr_config = gb_addr_config;
1961 
1962 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1963 			REG_GET_FIELD(
1964 					adev->gfx.config.gb_addr_config,
1965 					GB_ADDR_CONFIG,
1966 					NUM_PIPES);
1967 
1968 	adev->gfx.config.max_tile_pipes =
1969 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1970 
1971 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1972 			REG_GET_FIELD(
1973 					adev->gfx.config.gb_addr_config,
1974 					GB_ADDR_CONFIG,
1975 					NUM_BANKS);
1976 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1977 			REG_GET_FIELD(
1978 					adev->gfx.config.gb_addr_config,
1979 					GB_ADDR_CONFIG,
1980 					MAX_COMPRESSED_FRAGS);
1981 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1982 			REG_GET_FIELD(
1983 					adev->gfx.config.gb_addr_config,
1984 					GB_ADDR_CONFIG,
1985 					NUM_RB_PER_SE);
1986 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1987 			REG_GET_FIELD(
1988 					adev->gfx.config.gb_addr_config,
1989 					GB_ADDR_CONFIG,
1990 					NUM_SHADER_ENGINES);
1991 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1992 			REG_GET_FIELD(
1993 					adev->gfx.config.gb_addr_config,
1994 					GB_ADDR_CONFIG,
1995 					PIPE_INTERLEAVE_SIZE));
1996 
1997 	return 0;
1998 }
1999 
2000 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2001 				      int mec, int pipe, int queue)
2002 {
2003 	unsigned irq_type;
2004 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2005 	unsigned int hw_prio;
2006 
2007 	ring = &adev->gfx.compute_ring[ring_id];
2008 
2009 	/* mec0 is me1 */
2010 	ring->me = mec + 1;
2011 	ring->pipe = pipe;
2012 	ring->queue = queue;
2013 
2014 	ring->ring_obj = NULL;
2015 	ring->use_doorbell = true;
2016 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2017 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2018 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2019 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2020 
2021 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2022 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2023 		+ ring->pipe;
2024 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2025 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2026 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2027 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2028 				hw_prio, NULL);
2029 }
2030 
2031 static int gfx_v9_0_sw_init(void *handle)
2032 {
2033 	int i, j, k, r, ring_id;
2034 	struct amdgpu_ring *ring;
2035 	struct amdgpu_kiq *kiq;
2036 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2037 	unsigned int hw_prio;
2038 
2039 	switch (adev->ip_versions[GC_HWIP][0]) {
2040 	case IP_VERSION(9, 0, 1):
2041 	case IP_VERSION(9, 2, 1):
2042 	case IP_VERSION(9, 4, 0):
2043 	case IP_VERSION(9, 2, 2):
2044 	case IP_VERSION(9, 1, 0):
2045 	case IP_VERSION(9, 4, 1):
2046 	case IP_VERSION(9, 3, 0):
2047 	case IP_VERSION(9, 4, 2):
2048 		adev->gfx.mec.num_mec = 2;
2049 		break;
2050 	default:
2051 		adev->gfx.mec.num_mec = 1;
2052 		break;
2053 	}
2054 
2055 	adev->gfx.mec.num_pipe_per_mec = 4;
2056 	adev->gfx.mec.num_queue_per_pipe = 8;
2057 
2058 	/* EOP Event */
2059 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2060 	if (r)
2061 		return r;
2062 
2063 	/* Privileged reg */
2064 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2065 			      &adev->gfx.priv_reg_irq);
2066 	if (r)
2067 		return r;
2068 
2069 	/* Privileged inst */
2070 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2071 			      &adev->gfx.priv_inst_irq);
2072 	if (r)
2073 		return r;
2074 
2075 	/* ECC error */
2076 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2077 			      &adev->gfx.cp_ecc_error_irq);
2078 	if (r)
2079 		return r;
2080 
2081 	/* FUE error */
2082 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2083 			      &adev->gfx.cp_ecc_error_irq);
2084 	if (r)
2085 		return r;
2086 
2087 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2088 
2089 	if (adev->gfx.rlc.funcs) {
2090 		if (adev->gfx.rlc.funcs->init) {
2091 			r = adev->gfx.rlc.funcs->init(adev);
2092 			if (r) {
2093 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2094 				return r;
2095 			}
2096 		}
2097 	}
2098 
2099 	r = gfx_v9_0_mec_init(adev);
2100 	if (r) {
2101 		DRM_ERROR("Failed to init MEC BOs!\n");
2102 		return r;
2103 	}
2104 
2105 	/* set up the gfx ring */
2106 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2107 		ring = &adev->gfx.gfx_ring[i];
2108 		ring->ring_obj = NULL;
2109 		if (!i)
2110 			sprintf(ring->name, "gfx");
2111 		else
2112 			sprintf(ring->name, "gfx_%d", i);
2113 		ring->use_doorbell = true;
2114 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2115 
2116 		/* disable scheduler on the real ring */
2117 		ring->no_scheduler = true;
2118 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2119 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2120 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2121 		if (r)
2122 			return r;
2123 	}
2124 
2125 	/* set up the software rings */
2126 	if (adev->gfx.num_gfx_rings) {
2127 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2128 			ring = &adev->gfx.sw_gfx_ring[i];
2129 			ring->ring_obj = NULL;
2130 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2131 			ring->use_doorbell = true;
2132 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2133 			ring->is_sw_ring = true;
2134 			hw_prio = amdgpu_sw_ring_priority(i);
2135 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2136 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2137 					     NULL);
2138 			if (r)
2139 				return r;
2140 			ring->wptr = 0;
2141 		}
2142 
2143 		/* init the muxer and add software rings */
2144 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2145 					 GFX9_NUM_SW_GFX_RINGS);
2146 		if (r) {
2147 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2148 			return r;
2149 		}
2150 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2151 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2152 							&adev->gfx.sw_gfx_ring[i]);
2153 			if (r) {
2154 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2155 				return r;
2156 			}
2157 		}
2158 	}
2159 
2160 	/* set up the compute queues - allocate horizontally across pipes */
2161 	ring_id = 0;
2162 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2163 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2164 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2165 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2166 					continue;
2167 
2168 				r = gfx_v9_0_compute_ring_init(adev,
2169 							       ring_id,
2170 							       i, k, j);
2171 				if (r)
2172 					return r;
2173 
2174 				ring_id++;
2175 			}
2176 		}
2177 	}
2178 
2179 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2180 	if (r) {
2181 		DRM_ERROR("Failed to init KIQ BOs!\n");
2182 		return r;
2183 	}
2184 
2185 	kiq = &adev->gfx.kiq;
2186 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2187 	if (r)
2188 		return r;
2189 
2190 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2191 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2192 	if (r)
2193 		return r;
2194 
2195 	adev->gfx.ce_ram_size = 0x8000;
2196 
2197 	r = gfx_v9_0_gpu_early_init(adev);
2198 	if (r)
2199 		return r;
2200 
2201 	return 0;
2202 }
2203 
2204 
2205 static int gfx_v9_0_sw_fini(void *handle)
2206 {
2207 	int i;
2208 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2209 
2210 	if (adev->gfx.num_gfx_rings) {
2211 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2212 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2213 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2214 	}
2215 
2216 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2217 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2218 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2219 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2220 
2221 	amdgpu_gfx_mqd_sw_fini(adev);
2222 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2223 	amdgpu_gfx_kiq_fini(adev);
2224 
2225 	gfx_v9_0_mec_fini(adev);
2226 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2227 				&adev->gfx.rlc.clear_state_gpu_addr,
2228 				(void **)&adev->gfx.rlc.cs_ptr);
2229 	if (adev->flags & AMD_IS_APU) {
2230 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2231 				&adev->gfx.rlc.cp_table_gpu_addr,
2232 				(void **)&adev->gfx.rlc.cp_table_ptr);
2233 	}
2234 	gfx_v9_0_free_microcode(adev);
2235 
2236 	return 0;
2237 }
2238 
2239 
2240 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2241 {
2242 	/* TODO */
2243 }
2244 
2245 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2246 			   u32 instance)
2247 {
2248 	u32 data;
2249 
2250 	if (instance == 0xffffffff)
2251 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2252 	else
2253 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2254 
2255 	if (se_num == 0xffffffff)
2256 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2257 	else
2258 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2259 
2260 	if (sh_num == 0xffffffff)
2261 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2262 	else
2263 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2264 
2265 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2266 }
2267 
2268 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2269 {
2270 	u32 data, mask;
2271 
2272 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2273 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2274 
2275 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2276 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2277 
2278 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2279 					 adev->gfx.config.max_sh_per_se);
2280 
2281 	return (~data) & mask;
2282 }
2283 
2284 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2285 {
2286 	int i, j;
2287 	u32 data;
2288 	u32 active_rbs = 0;
2289 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2290 					adev->gfx.config.max_sh_per_se;
2291 
2292 	mutex_lock(&adev->grbm_idx_mutex);
2293 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2294 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2295 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2296 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2297 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2298 					       rb_bitmap_width_per_sh);
2299 		}
2300 	}
2301 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2302 	mutex_unlock(&adev->grbm_idx_mutex);
2303 
2304 	adev->gfx.config.backend_enable_mask = active_rbs;
2305 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2306 }
2307 
2308 #define DEFAULT_SH_MEM_BASES	(0x6000)
2309 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2310 {
2311 	int i;
2312 	uint32_t sh_mem_config;
2313 	uint32_t sh_mem_bases;
2314 
2315 	/*
2316 	 * Configure apertures:
2317 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2318 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2319 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2320 	 */
2321 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2322 
2323 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2324 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2325 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2326 
2327 	mutex_lock(&adev->srbm_mutex);
2328 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2329 		soc15_grbm_select(adev, 0, 0, 0, i);
2330 		/* CP and shaders */
2331 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2332 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2333 	}
2334 	soc15_grbm_select(adev, 0, 0, 0, 0);
2335 	mutex_unlock(&adev->srbm_mutex);
2336 
2337 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2338 	   access. These should be enabled by FW for target VMIDs. */
2339 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2340 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2341 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2342 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2343 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2344 	}
2345 }
2346 
2347 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2348 {
2349 	int vmid;
2350 
2351 	/*
2352 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2353 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2354 	 * the driver can enable them for graphics. VMID0 should maintain
2355 	 * access so that HWS firmware can save/restore entries.
2356 	 */
2357 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2358 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2359 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2360 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2361 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2362 	}
2363 }
2364 
2365 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2366 {
2367 	uint32_t tmp;
2368 
2369 	switch (adev->ip_versions[GC_HWIP][0]) {
2370 	case IP_VERSION(9, 4, 1):
2371 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2372 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2373 					DISABLE_BARRIER_WAITCNT, 1);
2374 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2375 		break;
2376 	default:
2377 		break;
2378 	}
2379 }
2380 
2381 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2382 {
2383 	u32 tmp;
2384 	int i;
2385 
2386 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2387 
2388 	gfx_v9_0_tiling_mode_table_init(adev);
2389 
2390 	if (adev->gfx.num_gfx_rings)
2391 		gfx_v9_0_setup_rb(adev);
2392 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2393 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2394 
2395 	/* XXX SH_MEM regs */
2396 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2397 	mutex_lock(&adev->srbm_mutex);
2398 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2399 		soc15_grbm_select(adev, 0, 0, 0, i);
2400 		/* CP and shaders */
2401 		if (i == 0) {
2402 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2403 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2404 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2405 					    !!adev->gmc.noretry);
2406 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2407 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2408 		} else {
2409 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2410 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2411 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2412 					    !!adev->gmc.noretry);
2413 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2414 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2415 				(adev->gmc.private_aperture_start >> 48));
2416 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2417 				(adev->gmc.shared_aperture_start >> 48));
2418 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2419 		}
2420 	}
2421 	soc15_grbm_select(adev, 0, 0, 0, 0);
2422 
2423 	mutex_unlock(&adev->srbm_mutex);
2424 
2425 	gfx_v9_0_init_compute_vmid(adev);
2426 	gfx_v9_0_init_gds_vmid(adev);
2427 	gfx_v9_0_init_sq_config(adev);
2428 }
2429 
2430 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2431 {
2432 	u32 i, j, k;
2433 	u32 mask;
2434 
2435 	mutex_lock(&adev->grbm_idx_mutex);
2436 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2437 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2438 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2439 			for (k = 0; k < adev->usec_timeout; k++) {
2440 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2441 					break;
2442 				udelay(1);
2443 			}
2444 			if (k == adev->usec_timeout) {
2445 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2446 						      0xffffffff, 0xffffffff);
2447 				mutex_unlock(&adev->grbm_idx_mutex);
2448 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2449 					 i, j);
2450 				return;
2451 			}
2452 		}
2453 	}
2454 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2455 	mutex_unlock(&adev->grbm_idx_mutex);
2456 
2457 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2458 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2459 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2460 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2461 	for (k = 0; k < adev->usec_timeout; k++) {
2462 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2463 			break;
2464 		udelay(1);
2465 	}
2466 }
2467 
2468 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2469 					       bool enable)
2470 {
2471 	u32 tmp;
2472 
2473 	/* These interrupts should be enabled to drive DS clock */
2474 
2475 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2476 
2477 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2478 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2479 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2480 	if(adev->gfx.num_gfx_rings)
2481 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2482 
2483 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2484 }
2485 
2486 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2487 {
2488 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2489 	/* csib */
2490 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2491 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2492 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2493 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2494 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2495 			adev->gfx.rlc.clear_state_size);
2496 }
2497 
2498 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2499 				int indirect_offset,
2500 				int list_size,
2501 				int *unique_indirect_regs,
2502 				int unique_indirect_reg_count,
2503 				int *indirect_start_offsets,
2504 				int *indirect_start_offsets_count,
2505 				int max_start_offsets_count)
2506 {
2507 	int idx;
2508 
2509 	for (; indirect_offset < list_size; indirect_offset++) {
2510 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2511 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2512 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2513 
2514 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2515 			indirect_offset += 2;
2516 
2517 			/* look for the matching indice */
2518 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2519 				if (unique_indirect_regs[idx] ==
2520 					register_list_format[indirect_offset] ||
2521 					!unique_indirect_regs[idx])
2522 					break;
2523 			}
2524 
2525 			BUG_ON(idx >= unique_indirect_reg_count);
2526 
2527 			if (!unique_indirect_regs[idx])
2528 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2529 
2530 			indirect_offset++;
2531 		}
2532 	}
2533 }
2534 
2535 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2536 {
2537 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2538 	int unique_indirect_reg_count = 0;
2539 
2540 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2541 	int indirect_start_offsets_count = 0;
2542 
2543 	int list_size = 0;
2544 	int i = 0, j = 0;
2545 	u32 tmp = 0;
2546 
2547 	u32 *register_list_format =
2548 		kmemdup(adev->gfx.rlc.register_list_format,
2549 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2550 	if (!register_list_format)
2551 		return -ENOMEM;
2552 
2553 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2554 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2555 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2556 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2557 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2558 				    unique_indirect_regs,
2559 				    unique_indirect_reg_count,
2560 				    indirect_start_offsets,
2561 				    &indirect_start_offsets_count,
2562 				    ARRAY_SIZE(indirect_start_offsets));
2563 
2564 	/* enable auto inc in case it is disabled */
2565 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2566 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2567 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2568 
2569 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2570 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2571 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2572 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2573 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2574 			adev->gfx.rlc.register_restore[i]);
2575 
2576 	/* load indirect register */
2577 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2578 		adev->gfx.rlc.reg_list_format_start);
2579 
2580 	/* direct register portion */
2581 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2582 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2583 			register_list_format[i]);
2584 
2585 	/* indirect register portion */
2586 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2587 		if (register_list_format[i] == 0xFFFFFFFF) {
2588 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2589 			continue;
2590 		}
2591 
2592 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2593 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594 
2595 		for (j = 0; j < unique_indirect_reg_count; j++) {
2596 			if (register_list_format[i] == unique_indirect_regs[j]) {
2597 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2598 				break;
2599 			}
2600 		}
2601 
2602 		BUG_ON(j >= unique_indirect_reg_count);
2603 
2604 		i++;
2605 	}
2606 
2607 	/* set save/restore list size */
2608 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2609 	list_size = list_size >> 1;
2610 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2611 		adev->gfx.rlc.reg_restore_list_size);
2612 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2613 
2614 	/* write the starting offsets to RLC scratch ram */
2615 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2616 		adev->gfx.rlc.starting_offsets_start);
2617 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2618 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2619 		       indirect_start_offsets[i]);
2620 
2621 	/* load unique indirect regs*/
2622 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2623 		if (unique_indirect_regs[i] != 0) {
2624 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2625 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2626 			       unique_indirect_regs[i] & 0x3FFFF);
2627 
2628 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2629 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2630 			       unique_indirect_regs[i] >> 20);
2631 		}
2632 	}
2633 
2634 	kfree(register_list_format);
2635 	return 0;
2636 }
2637 
2638 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2639 {
2640 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2641 }
2642 
2643 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2644 					     bool enable)
2645 {
2646 	uint32_t data = 0;
2647 	uint32_t default_data = 0;
2648 
2649 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2650 	if (enable) {
2651 		/* enable GFXIP control over CGPG */
2652 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2653 		if(default_data != data)
2654 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2655 
2656 		/* update status */
2657 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2658 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2659 		if(default_data != data)
2660 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2661 	} else {
2662 		/* restore GFXIP control over GCPG */
2663 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2664 		if(default_data != data)
2665 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2666 	}
2667 }
2668 
2669 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2670 {
2671 	uint32_t data = 0;
2672 
2673 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2674 			      AMD_PG_SUPPORT_GFX_SMG |
2675 			      AMD_PG_SUPPORT_GFX_DMG)) {
2676 		/* init IDLE_POLL_COUNT = 60 */
2677 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2678 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2679 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2680 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2681 
2682 		/* init RLC PG Delay */
2683 		data = 0;
2684 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2685 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2686 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2687 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2688 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2689 
2690 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2691 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2692 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2693 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2694 
2695 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2696 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2697 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2698 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2699 
2700 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2701 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2702 
2703 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2704 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2705 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2706 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2707 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2708 	}
2709 }
2710 
2711 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2712 						bool enable)
2713 {
2714 	uint32_t data = 0;
2715 	uint32_t default_data = 0;
2716 
2717 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2718 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2719 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2720 			     enable ? 1 : 0);
2721 	if (default_data != data)
2722 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2723 }
2724 
2725 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2726 						bool enable)
2727 {
2728 	uint32_t data = 0;
2729 	uint32_t default_data = 0;
2730 
2731 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2732 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2733 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2734 			     enable ? 1 : 0);
2735 	if(default_data != data)
2736 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2737 }
2738 
2739 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2740 					bool enable)
2741 {
2742 	uint32_t data = 0;
2743 	uint32_t default_data = 0;
2744 
2745 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2746 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2747 			     CP_PG_DISABLE,
2748 			     enable ? 0 : 1);
2749 	if(default_data != data)
2750 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2751 }
2752 
2753 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2754 						bool enable)
2755 {
2756 	uint32_t data, default_data;
2757 
2758 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2759 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2760 			     GFX_POWER_GATING_ENABLE,
2761 			     enable ? 1 : 0);
2762 	if(default_data != data)
2763 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2764 }
2765 
2766 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2767 						bool enable)
2768 {
2769 	uint32_t data, default_data;
2770 
2771 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2772 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2773 			     GFX_PIPELINE_PG_ENABLE,
2774 			     enable ? 1 : 0);
2775 	if(default_data != data)
2776 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2777 
2778 	if (!enable)
2779 		/* read any GFX register to wake up GFX */
2780 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2781 }
2782 
2783 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2784 						       bool enable)
2785 {
2786 	uint32_t data, default_data;
2787 
2788 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2789 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2790 			     STATIC_PER_CU_PG_ENABLE,
2791 			     enable ? 1 : 0);
2792 	if(default_data != data)
2793 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2794 }
2795 
2796 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2797 						bool enable)
2798 {
2799 	uint32_t data, default_data;
2800 
2801 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2802 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2803 			     DYN_PER_CU_PG_ENABLE,
2804 			     enable ? 1 : 0);
2805 	if(default_data != data)
2806 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2807 }
2808 
2809 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2810 {
2811 	gfx_v9_0_init_csb(adev);
2812 
2813 	/*
2814 	 * Rlc save restore list is workable since v2_1.
2815 	 * And it's needed by gfxoff feature.
2816 	 */
2817 	if (adev->gfx.rlc.is_rlc_v2_1) {
2818 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2819 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2820 			gfx_v9_1_init_rlc_save_restore_list(adev);
2821 		gfx_v9_0_enable_save_restore_machine(adev);
2822 	}
2823 
2824 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2825 			      AMD_PG_SUPPORT_GFX_SMG |
2826 			      AMD_PG_SUPPORT_GFX_DMG |
2827 			      AMD_PG_SUPPORT_CP |
2828 			      AMD_PG_SUPPORT_GDS |
2829 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2830 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2831 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2832 		gfx_v9_0_init_gfx_power_gating(adev);
2833 	}
2834 }
2835 
2836 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2837 {
2838 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2839 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2840 	gfx_v9_0_wait_for_rlc_serdes(adev);
2841 }
2842 
2843 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2844 {
2845 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2846 	udelay(50);
2847 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2848 	udelay(50);
2849 }
2850 
2851 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2852 {
2853 #ifdef AMDGPU_RLC_DEBUG_RETRY
2854 	u32 rlc_ucode_ver;
2855 #endif
2856 
2857 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2858 	udelay(50);
2859 
2860 	/* carrizo do enable cp interrupt after cp inited */
2861 	if (!(adev->flags & AMD_IS_APU)) {
2862 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2863 		udelay(50);
2864 	}
2865 
2866 #ifdef AMDGPU_RLC_DEBUG_RETRY
2867 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2868 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2869 	if(rlc_ucode_ver == 0x108) {
2870 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2871 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2872 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2873 		 * default is 0x9C4 to create a 100us interval */
2874 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2875 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2876 		 * to disable the page fault retry interrupts, default is
2877 		 * 0x100 (256) */
2878 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2879 	}
2880 #endif
2881 }
2882 
2883 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2884 {
2885 	const struct rlc_firmware_header_v2_0 *hdr;
2886 	const __le32 *fw_data;
2887 	unsigned i, fw_size;
2888 
2889 	if (!adev->gfx.rlc_fw)
2890 		return -EINVAL;
2891 
2892 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2893 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2894 
2895 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2896 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2897 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2898 
2899 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2900 			RLCG_UCODE_LOADING_START_ADDRESS);
2901 	for (i = 0; i < fw_size; i++)
2902 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2903 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2904 
2905 	return 0;
2906 }
2907 
2908 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2909 {
2910 	int r;
2911 
2912 	if (amdgpu_sriov_vf(adev)) {
2913 		gfx_v9_0_init_csb(adev);
2914 		return 0;
2915 	}
2916 
2917 	adev->gfx.rlc.funcs->stop(adev);
2918 
2919 	/* disable CG */
2920 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2921 
2922 	gfx_v9_0_init_pg(adev);
2923 
2924 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2925 		/* legacy rlc firmware loading */
2926 		r = gfx_v9_0_rlc_load_microcode(adev);
2927 		if (r)
2928 			return r;
2929 	}
2930 
2931 	switch (adev->ip_versions[GC_HWIP][0]) {
2932 	case IP_VERSION(9, 2, 2):
2933 	case IP_VERSION(9, 1, 0):
2934 		if (amdgpu_lbpw == 0)
2935 			gfx_v9_0_enable_lbpw(adev, false);
2936 		else
2937 			gfx_v9_0_enable_lbpw(adev, true);
2938 		break;
2939 	case IP_VERSION(9, 4, 0):
2940 		if (amdgpu_lbpw > 0)
2941 			gfx_v9_0_enable_lbpw(adev, true);
2942 		else
2943 			gfx_v9_0_enable_lbpw(adev, false);
2944 		break;
2945 	default:
2946 		break;
2947 	}
2948 
2949 	adev->gfx.rlc.funcs->start(adev);
2950 
2951 	return 0;
2952 }
2953 
2954 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2955 {
2956 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2957 
2958 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2959 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2960 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2961 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2962 	udelay(50);
2963 }
2964 
2965 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966 {
2967 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2969 	const struct gfx_firmware_header_v1_0 *me_hdr;
2970 	const __le32 *fw_data;
2971 	unsigned i, fw_size;
2972 
2973 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974 		return -EINVAL;
2975 
2976 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 		adev->gfx.pfp_fw->data;
2978 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 		adev->gfx.ce_fw->data;
2980 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981 		adev->gfx.me_fw->data;
2982 
2983 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2986 
2987 	gfx_v9_0_cp_gfx_enable(adev, false);
2988 
2989 	/* PFP */
2990 	fw_data = (const __le32 *)
2991 		(adev->gfx.pfp_fw->data +
2992 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2995 	for (i = 0; i < fw_size; i++)
2996 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998 
2999 	/* CE */
3000 	fw_data = (const __le32 *)
3001 		(adev->gfx.ce_fw->data +
3002 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3005 	for (i = 0; i < fw_size; i++)
3006 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008 
3009 	/* ME */
3010 	fw_data = (const __le32 *)
3011 		(adev->gfx.me_fw->data +
3012 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3015 	for (i = 0; i < fw_size; i++)
3016 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018 
3019 	return 0;
3020 }
3021 
3022 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3023 {
3024 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3025 	const struct cs_section_def *sect = NULL;
3026 	const struct cs_extent_def *ext = NULL;
3027 	int r, i, tmp;
3028 
3029 	/* init the CP */
3030 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3031 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3032 
3033 	gfx_v9_0_cp_gfx_enable(adev, true);
3034 
3035 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3036 	if (r) {
3037 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3038 		return r;
3039 	}
3040 
3041 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3042 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3043 
3044 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3045 	amdgpu_ring_write(ring, 0x80000000);
3046 	amdgpu_ring_write(ring, 0x80000000);
3047 
3048 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3049 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3050 			if (sect->id == SECT_CONTEXT) {
3051 				amdgpu_ring_write(ring,
3052 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3053 					       ext->reg_count));
3054 				amdgpu_ring_write(ring,
3055 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3056 				for (i = 0; i < ext->reg_count; i++)
3057 					amdgpu_ring_write(ring, ext->extent[i]);
3058 			}
3059 		}
3060 	}
3061 
3062 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3063 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3064 
3065 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3066 	amdgpu_ring_write(ring, 0);
3067 
3068 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3069 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3070 	amdgpu_ring_write(ring, 0x8000);
3071 	amdgpu_ring_write(ring, 0x8000);
3072 
3073 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3074 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3075 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3076 	amdgpu_ring_write(ring, tmp);
3077 	amdgpu_ring_write(ring, 0);
3078 
3079 	amdgpu_ring_commit(ring);
3080 
3081 	return 0;
3082 }
3083 
3084 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3085 {
3086 	struct amdgpu_ring *ring;
3087 	u32 tmp;
3088 	u32 rb_bufsz;
3089 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3090 
3091 	/* Set the write pointer delay */
3092 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3093 
3094 	/* set the RB to use vmid 0 */
3095 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3096 
3097 	/* Set ring buffer size */
3098 	ring = &adev->gfx.gfx_ring[0];
3099 	rb_bufsz = order_base_2(ring->ring_size / 8);
3100 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3101 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3102 #ifdef __BIG_ENDIAN
3103 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3104 #endif
3105 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3106 
3107 	/* Initialize the ring buffer's write pointers */
3108 	ring->wptr = 0;
3109 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3110 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3111 
3112 	/* set the wb address wether it's enabled or not */
3113 	rptr_addr = ring->rptr_gpu_addr;
3114 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3115 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3116 
3117 	wptr_gpu_addr = ring->wptr_gpu_addr;
3118 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3119 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3120 
3121 	mdelay(1);
3122 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3123 
3124 	rb_addr = ring->gpu_addr >> 8;
3125 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3126 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3127 
3128 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3129 	if (ring->use_doorbell) {
3130 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3131 				    DOORBELL_OFFSET, ring->doorbell_index);
3132 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3133 				    DOORBELL_EN, 1);
3134 	} else {
3135 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3136 	}
3137 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3138 
3139 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3140 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3141 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3142 
3143 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3144 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3145 
3146 
3147 	/* start the ring */
3148 	gfx_v9_0_cp_gfx_start(adev);
3149 	ring->sched.ready = true;
3150 
3151 	return 0;
3152 }
3153 
3154 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3155 {
3156 	if (enable) {
3157 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3158 	} else {
3159 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3160 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3161 		adev->gfx.kiq.ring.sched.ready = false;
3162 	}
3163 	udelay(50);
3164 }
3165 
3166 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3167 {
3168 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3169 	const __le32 *fw_data;
3170 	unsigned i;
3171 	u32 tmp;
3172 
3173 	if (!adev->gfx.mec_fw)
3174 		return -EINVAL;
3175 
3176 	gfx_v9_0_cp_compute_enable(adev, false);
3177 
3178 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3179 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3180 
3181 	fw_data = (const __le32 *)
3182 		(adev->gfx.mec_fw->data +
3183 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3184 	tmp = 0;
3185 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3186 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3187 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3188 
3189 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3190 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3191 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3192 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3193 
3194 	/* MEC1 */
3195 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3196 			 mec_hdr->jt_offset);
3197 	for (i = 0; i < mec_hdr->jt_size; i++)
3198 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3199 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3200 
3201 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3202 			adev->gfx.mec_fw_version);
3203 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3204 
3205 	return 0;
3206 }
3207 
3208 /* KIQ functions */
3209 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3210 {
3211 	uint32_t tmp;
3212 	struct amdgpu_device *adev = ring->adev;
3213 
3214 	/* tell RLC which is KIQ queue */
3215 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3216 	tmp &= 0xffffff00;
3217 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3218 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3219 	tmp |= 0x80;
3220 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3221 }
3222 
3223 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3224 {
3225 	struct amdgpu_device *adev = ring->adev;
3226 
3227 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3228 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3229 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3230 			mqd->cp_hqd_queue_priority =
3231 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3232 		}
3233 	}
3234 }
3235 
3236 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3237 {
3238 	struct amdgpu_device *adev = ring->adev;
3239 	struct v9_mqd *mqd = ring->mqd_ptr;
3240 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3241 	uint32_t tmp;
3242 
3243 	mqd->header = 0xC0310800;
3244 	mqd->compute_pipelinestat_enable = 0x00000001;
3245 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3246 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3247 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3248 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3249 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3250 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3251 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3252 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3253 	mqd->compute_misc_reserved = 0x00000003;
3254 
3255 	mqd->dynamic_cu_mask_addr_lo =
3256 		lower_32_bits(ring->mqd_gpu_addr
3257 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3258 	mqd->dynamic_cu_mask_addr_hi =
3259 		upper_32_bits(ring->mqd_gpu_addr
3260 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3261 
3262 	eop_base_addr = ring->eop_gpu_addr >> 8;
3263 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3264 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3265 
3266 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3267 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3268 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3269 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3270 
3271 	mqd->cp_hqd_eop_control = tmp;
3272 
3273 	/* enable doorbell? */
3274 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3275 
3276 	if (ring->use_doorbell) {
3277 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3278 				    DOORBELL_OFFSET, ring->doorbell_index);
3279 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3280 				    DOORBELL_EN, 1);
3281 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3282 				    DOORBELL_SOURCE, 0);
3283 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3284 				    DOORBELL_HIT, 0);
3285 	} else {
3286 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3287 					 DOORBELL_EN, 0);
3288 	}
3289 
3290 	mqd->cp_hqd_pq_doorbell_control = tmp;
3291 
3292 	/* disable the queue if it's active */
3293 	ring->wptr = 0;
3294 	mqd->cp_hqd_dequeue_request = 0;
3295 	mqd->cp_hqd_pq_rptr = 0;
3296 	mqd->cp_hqd_pq_wptr_lo = 0;
3297 	mqd->cp_hqd_pq_wptr_hi = 0;
3298 
3299 	/* set the pointer to the MQD */
3300 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3301 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3302 
3303 	/* set MQD vmid to 0 */
3304 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3305 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3306 	mqd->cp_mqd_control = tmp;
3307 
3308 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3309 	hqd_gpu_addr = ring->gpu_addr >> 8;
3310 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3311 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3312 
3313 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3314 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3315 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3316 			    (order_base_2(ring->ring_size / 4) - 1));
3317 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3318 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3319 #ifdef __BIG_ENDIAN
3320 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3321 #endif
3322 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3323 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3324 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3325 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3326 	mqd->cp_hqd_pq_control = tmp;
3327 
3328 	/* set the wb address whether it's enabled or not */
3329 	wb_gpu_addr = ring->rptr_gpu_addr;
3330 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3331 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3332 		upper_32_bits(wb_gpu_addr) & 0xffff;
3333 
3334 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3335 	wb_gpu_addr = ring->wptr_gpu_addr;
3336 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3337 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3338 
3339 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3340 	ring->wptr = 0;
3341 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3342 
3343 	/* set the vmid for the queue */
3344 	mqd->cp_hqd_vmid = 0;
3345 
3346 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3347 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3348 	mqd->cp_hqd_persistent_state = tmp;
3349 
3350 	/* set MIN_IB_AVAIL_SIZE */
3351 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3352 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3353 	mqd->cp_hqd_ib_control = tmp;
3354 
3355 	/* set static priority for a queue/ring */
3356 	gfx_v9_0_mqd_set_priority(ring, mqd);
3357 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3358 
3359 	/* map_queues packet doesn't need activate the queue,
3360 	 * so only kiq need set this field.
3361 	 */
3362 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3363 		mqd->cp_hqd_active = 1;
3364 
3365 	return 0;
3366 }
3367 
3368 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3369 {
3370 	struct amdgpu_device *adev = ring->adev;
3371 	struct v9_mqd *mqd = ring->mqd_ptr;
3372 	int j;
3373 
3374 	/* disable wptr polling */
3375 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3376 
3377 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3378 	       mqd->cp_hqd_eop_base_addr_lo);
3379 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3380 	       mqd->cp_hqd_eop_base_addr_hi);
3381 
3382 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3383 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3384 	       mqd->cp_hqd_eop_control);
3385 
3386 	/* enable doorbell? */
3387 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3388 	       mqd->cp_hqd_pq_doorbell_control);
3389 
3390 	/* disable the queue if it's active */
3391 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3392 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3393 		for (j = 0; j < adev->usec_timeout; j++) {
3394 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3395 				break;
3396 			udelay(1);
3397 		}
3398 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3399 		       mqd->cp_hqd_dequeue_request);
3400 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3401 		       mqd->cp_hqd_pq_rptr);
3402 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3403 		       mqd->cp_hqd_pq_wptr_lo);
3404 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3405 		       mqd->cp_hqd_pq_wptr_hi);
3406 	}
3407 
3408 	/* set the pointer to the MQD */
3409 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3410 	       mqd->cp_mqd_base_addr_lo);
3411 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3412 	       mqd->cp_mqd_base_addr_hi);
3413 
3414 	/* set MQD vmid to 0 */
3415 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3416 	       mqd->cp_mqd_control);
3417 
3418 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3419 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3420 	       mqd->cp_hqd_pq_base_lo);
3421 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3422 	       mqd->cp_hqd_pq_base_hi);
3423 
3424 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3425 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3426 	       mqd->cp_hqd_pq_control);
3427 
3428 	/* set the wb address whether it's enabled or not */
3429 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3430 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3431 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3432 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3433 
3434 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3435 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3436 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3437 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3438 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3439 
3440 	/* enable the doorbell if requested */
3441 	if (ring->use_doorbell) {
3442 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3443 					(adev->doorbell_index.kiq * 2) << 2);
3444 		/* If GC has entered CGPG, ringing doorbell > first page
3445 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3446 		 * workaround this issue. And this change has to align with firmware
3447 		 * update.
3448 		 */
3449 		if (check_if_enlarge_doorbell_range(adev))
3450 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3451 					(adev->doorbell.size - 4));
3452 		else
3453 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3454 					(adev->doorbell_index.userqueue_end * 2) << 2);
3455 	}
3456 
3457 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3458 	       mqd->cp_hqd_pq_doorbell_control);
3459 
3460 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3462 	       mqd->cp_hqd_pq_wptr_lo);
3463 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3464 	       mqd->cp_hqd_pq_wptr_hi);
3465 
3466 	/* set the vmid for the queue */
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3468 
3469 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3470 	       mqd->cp_hqd_persistent_state);
3471 
3472 	/* activate the queue */
3473 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3474 	       mqd->cp_hqd_active);
3475 
3476 	if (ring->use_doorbell)
3477 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3478 
3479 	return 0;
3480 }
3481 
3482 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3483 {
3484 	struct amdgpu_device *adev = ring->adev;
3485 	int j;
3486 
3487 	/* disable the queue if it's active */
3488 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3489 
3490 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3491 
3492 		for (j = 0; j < adev->usec_timeout; j++) {
3493 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3494 				break;
3495 			udelay(1);
3496 		}
3497 
3498 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3499 			DRM_DEBUG("KIQ dequeue request failed.\n");
3500 
3501 			/* Manual disable if dequeue request times out */
3502 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3503 		}
3504 
3505 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3506 		      0);
3507 	}
3508 
3509 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3510 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3511 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3512 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3513 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3514 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3515 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3516 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3517 
3518 	return 0;
3519 }
3520 
3521 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3522 {
3523 	struct amdgpu_device *adev = ring->adev;
3524 	struct v9_mqd *mqd = ring->mqd_ptr;
3525 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3526 	struct v9_mqd *tmp_mqd;
3527 
3528 	gfx_v9_0_kiq_setting(ring);
3529 
3530 	/* GPU could be in bad state during probe, driver trigger the reset
3531 	 * after load the SMU, in this case , the mqd is not be initialized.
3532 	 * driver need to re-init the mqd.
3533 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3534 	 */
3535 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3536 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3537 		/* for GPU_RESET case , reset MQD to a clean status */
3538 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3539 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3540 
3541 		/* reset ring buffer */
3542 		ring->wptr = 0;
3543 		amdgpu_ring_clear_ring(ring);
3544 
3545 		mutex_lock(&adev->srbm_mutex);
3546 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3547 		gfx_v9_0_kiq_init_register(ring);
3548 		soc15_grbm_select(adev, 0, 0, 0, 0);
3549 		mutex_unlock(&adev->srbm_mutex);
3550 	} else {
3551 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3552 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3553 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3554 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3555 			amdgpu_ring_clear_ring(ring);
3556 		mutex_lock(&adev->srbm_mutex);
3557 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3558 		gfx_v9_0_mqd_init(ring);
3559 		gfx_v9_0_kiq_init_register(ring);
3560 		soc15_grbm_select(adev, 0, 0, 0, 0);
3561 		mutex_unlock(&adev->srbm_mutex);
3562 
3563 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3564 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3565 	}
3566 
3567 	return 0;
3568 }
3569 
3570 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3571 {
3572 	struct amdgpu_device *adev = ring->adev;
3573 	struct v9_mqd *mqd = ring->mqd_ptr;
3574 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3575 	struct v9_mqd *tmp_mqd;
3576 
3577 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3578 	 * is not be initialized before
3579 	 */
3580 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3581 
3582 	if (!tmp_mqd->cp_hqd_pq_control ||
3583 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3584 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3585 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3586 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3587 		mutex_lock(&adev->srbm_mutex);
3588 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3589 		gfx_v9_0_mqd_init(ring);
3590 		soc15_grbm_select(adev, 0, 0, 0, 0);
3591 		mutex_unlock(&adev->srbm_mutex);
3592 
3593 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3594 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3595 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3596 		/* reset MQD to a clean status */
3597 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3598 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3599 
3600 		/* reset ring buffer */
3601 		ring->wptr = 0;
3602 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3603 		amdgpu_ring_clear_ring(ring);
3604 	} else {
3605 		amdgpu_ring_clear_ring(ring);
3606 	}
3607 
3608 	return 0;
3609 }
3610 
3611 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3612 {
3613 	struct amdgpu_ring *ring;
3614 	int r;
3615 
3616 	ring = &adev->gfx.kiq.ring;
3617 
3618 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3619 	if (unlikely(r != 0))
3620 		return r;
3621 
3622 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3623 	if (unlikely(r != 0))
3624 		return r;
3625 
3626 	gfx_v9_0_kiq_init_queue(ring);
3627 	amdgpu_bo_kunmap(ring->mqd_obj);
3628 	ring->mqd_ptr = NULL;
3629 	amdgpu_bo_unreserve(ring->mqd_obj);
3630 	ring->sched.ready = true;
3631 	return 0;
3632 }
3633 
3634 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3635 {
3636 	struct amdgpu_ring *ring = NULL;
3637 	int r = 0, i;
3638 
3639 	gfx_v9_0_cp_compute_enable(adev, true);
3640 
3641 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3642 		ring = &adev->gfx.compute_ring[i];
3643 
3644 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3645 		if (unlikely(r != 0))
3646 			goto done;
3647 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3648 		if (!r) {
3649 			r = gfx_v9_0_kcq_init_queue(ring);
3650 			amdgpu_bo_kunmap(ring->mqd_obj);
3651 			ring->mqd_ptr = NULL;
3652 		}
3653 		amdgpu_bo_unreserve(ring->mqd_obj);
3654 		if (r)
3655 			goto done;
3656 	}
3657 
3658 	r = amdgpu_gfx_enable_kcq(adev);
3659 done:
3660 	return r;
3661 }
3662 
3663 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3664 {
3665 	int r, i;
3666 	struct amdgpu_ring *ring;
3667 
3668 	if (!(adev->flags & AMD_IS_APU))
3669 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3670 
3671 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3672 		if (adev->gfx.num_gfx_rings) {
3673 			/* legacy firmware loading */
3674 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3675 			if (r)
3676 				return r;
3677 		}
3678 
3679 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3680 		if (r)
3681 			return r;
3682 	}
3683 
3684 	r = gfx_v9_0_kiq_resume(adev);
3685 	if (r)
3686 		return r;
3687 
3688 	if (adev->gfx.num_gfx_rings) {
3689 		r = gfx_v9_0_cp_gfx_resume(adev);
3690 		if (r)
3691 			return r;
3692 	}
3693 
3694 	r = gfx_v9_0_kcq_resume(adev);
3695 	if (r)
3696 		return r;
3697 
3698 	if (adev->gfx.num_gfx_rings) {
3699 		ring = &adev->gfx.gfx_ring[0];
3700 		r = amdgpu_ring_test_helper(ring);
3701 		if (r)
3702 			return r;
3703 	}
3704 
3705 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3706 		ring = &adev->gfx.compute_ring[i];
3707 		amdgpu_ring_test_helper(ring);
3708 	}
3709 
3710 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3711 
3712 	return 0;
3713 }
3714 
3715 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3716 {
3717 	u32 tmp;
3718 
3719 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3720 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3721 		return;
3722 
3723 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3724 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3725 				adev->df.hash_status.hash_64k);
3726 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3727 				adev->df.hash_status.hash_2m);
3728 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3729 				adev->df.hash_status.hash_1g);
3730 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3731 }
3732 
3733 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3734 {
3735 	if (adev->gfx.num_gfx_rings)
3736 		gfx_v9_0_cp_gfx_enable(adev, enable);
3737 	gfx_v9_0_cp_compute_enable(adev, enable);
3738 }
3739 
3740 static int gfx_v9_0_hw_init(void *handle)
3741 {
3742 	int r;
3743 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3744 
3745 	if (!amdgpu_sriov_vf(adev))
3746 		gfx_v9_0_init_golden_registers(adev);
3747 
3748 	gfx_v9_0_constants_init(adev);
3749 
3750 	gfx_v9_0_init_tcp_config(adev);
3751 
3752 	r = adev->gfx.rlc.funcs->resume(adev);
3753 	if (r)
3754 		return r;
3755 
3756 	r = gfx_v9_0_cp_resume(adev);
3757 	if (r)
3758 		return r;
3759 
3760 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3761 		gfx_v9_4_2_set_power_brake_sequence(adev);
3762 
3763 	return r;
3764 }
3765 
3766 static int gfx_v9_0_hw_fini(void *handle)
3767 {
3768 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3769 
3770 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3771 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3772 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3773 
3774 	/* DF freeze and kcq disable will fail */
3775 	if (!amdgpu_ras_intr_triggered())
3776 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3777 		amdgpu_gfx_disable_kcq(adev);
3778 
3779 	if (amdgpu_sriov_vf(adev)) {
3780 		gfx_v9_0_cp_gfx_enable(adev, false);
3781 		/* must disable polling for SRIOV when hw finished, otherwise
3782 		 * CPC engine may still keep fetching WB address which is already
3783 		 * invalid after sw finished and trigger DMAR reading error in
3784 		 * hypervisor side.
3785 		 */
3786 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3787 		return 0;
3788 	}
3789 
3790 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3791 	 * otherwise KIQ is hanging when binding back
3792 	 */
3793 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3794 		mutex_lock(&adev->srbm_mutex);
3795 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3796 				adev->gfx.kiq.ring.pipe,
3797 				adev->gfx.kiq.ring.queue, 0);
3798 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3799 		soc15_grbm_select(adev, 0, 0, 0, 0);
3800 		mutex_unlock(&adev->srbm_mutex);
3801 	}
3802 
3803 	gfx_v9_0_cp_enable(adev, false);
3804 
3805 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3806 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3807 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3808 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3809 		return 0;
3810 	}
3811 
3812 	adev->gfx.rlc.funcs->stop(adev);
3813 	return 0;
3814 }
3815 
3816 static int gfx_v9_0_suspend(void *handle)
3817 {
3818 	return gfx_v9_0_hw_fini(handle);
3819 }
3820 
3821 static int gfx_v9_0_resume(void *handle)
3822 {
3823 	return gfx_v9_0_hw_init(handle);
3824 }
3825 
3826 static bool gfx_v9_0_is_idle(void *handle)
3827 {
3828 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3829 
3830 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3831 				GRBM_STATUS, GUI_ACTIVE))
3832 		return false;
3833 	else
3834 		return true;
3835 }
3836 
3837 static int gfx_v9_0_wait_for_idle(void *handle)
3838 {
3839 	unsigned i;
3840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3841 
3842 	for (i = 0; i < adev->usec_timeout; i++) {
3843 		if (gfx_v9_0_is_idle(handle))
3844 			return 0;
3845 		udelay(1);
3846 	}
3847 	return -ETIMEDOUT;
3848 }
3849 
3850 static int gfx_v9_0_soft_reset(void *handle)
3851 {
3852 	u32 grbm_soft_reset = 0;
3853 	u32 tmp;
3854 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3855 
3856 	/* GRBM_STATUS */
3857 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3858 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3859 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3860 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3861 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3862 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3863 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3864 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3865 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3866 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3867 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3868 	}
3869 
3870 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3871 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3872 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3873 	}
3874 
3875 	/* GRBM_STATUS2 */
3876 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3877 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3878 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3879 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3880 
3881 
3882 	if (grbm_soft_reset) {
3883 		/* stop the rlc */
3884 		adev->gfx.rlc.funcs->stop(adev);
3885 
3886 		if (adev->gfx.num_gfx_rings)
3887 			/* Disable GFX parsing/prefetching */
3888 			gfx_v9_0_cp_gfx_enable(adev, false);
3889 
3890 		/* Disable MEC parsing/prefetching */
3891 		gfx_v9_0_cp_compute_enable(adev, false);
3892 
3893 		if (grbm_soft_reset) {
3894 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3895 			tmp |= grbm_soft_reset;
3896 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3897 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3898 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3899 
3900 			udelay(50);
3901 
3902 			tmp &= ~grbm_soft_reset;
3903 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3904 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3905 		}
3906 
3907 		/* Wait a little for things to settle down */
3908 		udelay(50);
3909 	}
3910 	return 0;
3911 }
3912 
3913 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3914 {
3915 	signed long r, cnt = 0;
3916 	unsigned long flags;
3917 	uint32_t seq, reg_val_offs = 0;
3918 	uint64_t value = 0;
3919 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3920 	struct amdgpu_ring *ring = &kiq->ring;
3921 
3922 	BUG_ON(!ring->funcs->emit_rreg);
3923 
3924 	spin_lock_irqsave(&kiq->ring_lock, flags);
3925 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3926 		pr_err("critical bug! too many kiq readers\n");
3927 		goto failed_unlock;
3928 	}
3929 	amdgpu_ring_alloc(ring, 32);
3930 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3931 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3932 				(5 << 8) |	/* dst: memory */
3933 				(1 << 16) |	/* count sel */
3934 				(1 << 20));	/* write confirm */
3935 	amdgpu_ring_write(ring, 0);
3936 	amdgpu_ring_write(ring, 0);
3937 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3938 				reg_val_offs * 4));
3939 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3940 				reg_val_offs * 4));
3941 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3942 	if (r)
3943 		goto failed_undo;
3944 
3945 	amdgpu_ring_commit(ring);
3946 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3947 
3948 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3949 
3950 	/* don't wait anymore for gpu reset case because this way may
3951 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3952 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3953 	 * never return if we keep waiting in virt_kiq_rreg, which cause
3954 	 * gpu_recover() hang there.
3955 	 *
3956 	 * also don't wait anymore for IRQ context
3957 	 * */
3958 	if (r < 1 && (amdgpu_in_reset(adev)))
3959 		goto failed_kiq_read;
3960 
3961 	might_sleep();
3962 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3963 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3964 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3965 	}
3966 
3967 	if (cnt > MAX_KIQ_REG_TRY)
3968 		goto failed_kiq_read;
3969 
3970 	mb();
3971 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
3972 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3973 	amdgpu_device_wb_free(adev, reg_val_offs);
3974 	return value;
3975 
3976 failed_undo:
3977 	amdgpu_ring_undo(ring);
3978 failed_unlock:
3979 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3980 failed_kiq_read:
3981 	if (reg_val_offs)
3982 		amdgpu_device_wb_free(adev, reg_val_offs);
3983 	pr_err("failed to read gpu clock\n");
3984 	return ~0;
3985 }
3986 
3987 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3988 {
3989 	uint64_t clock, clock_lo, clock_hi, hi_check;
3990 
3991 	switch (adev->ip_versions[GC_HWIP][0]) {
3992 	case IP_VERSION(9, 3, 0):
3993 		preempt_disable();
3994 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
3995 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
3996 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
3997 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
3998 		 * roughly every 42 seconds.
3999 		 */
4000 		if (hi_check != clock_hi) {
4001 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4002 			clock_hi = hi_check;
4003 		}
4004 		preempt_enable();
4005 		clock = clock_lo | (clock_hi << 32ULL);
4006 		break;
4007 	default:
4008 		amdgpu_gfx_off_ctrl(adev, false);
4009 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4010 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4011 			clock = gfx_v9_0_kiq_read_clock(adev);
4012 		} else {
4013 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4014 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4015 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4016 		}
4017 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4018 		amdgpu_gfx_off_ctrl(adev, true);
4019 		break;
4020 	}
4021 	return clock;
4022 }
4023 
4024 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4025 					  uint32_t vmid,
4026 					  uint32_t gds_base, uint32_t gds_size,
4027 					  uint32_t gws_base, uint32_t gws_size,
4028 					  uint32_t oa_base, uint32_t oa_size)
4029 {
4030 	struct amdgpu_device *adev = ring->adev;
4031 
4032 	/* GDS Base */
4033 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4034 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4035 				   gds_base);
4036 
4037 	/* GDS Size */
4038 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4039 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4040 				   gds_size);
4041 
4042 	/* GWS */
4043 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4044 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4045 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4046 
4047 	/* OA */
4048 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4049 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4050 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4051 }
4052 
4053 static const u32 vgpr_init_compute_shader[] =
4054 {
4055 	0xb07c0000, 0xbe8000ff,
4056 	0x000000f8, 0xbf110800,
4057 	0x7e000280, 0x7e020280,
4058 	0x7e040280, 0x7e060280,
4059 	0x7e080280, 0x7e0a0280,
4060 	0x7e0c0280, 0x7e0e0280,
4061 	0x80808800, 0xbe803200,
4062 	0xbf84fff5, 0xbf9c0000,
4063 	0xd28c0001, 0x0001007f,
4064 	0xd28d0001, 0x0002027e,
4065 	0x10020288, 0xb8810904,
4066 	0xb7814000, 0xd1196a01,
4067 	0x00000301, 0xbe800087,
4068 	0xbefc00c1, 0xd89c4000,
4069 	0x00020201, 0xd89cc080,
4070 	0x00040401, 0x320202ff,
4071 	0x00000800, 0x80808100,
4072 	0xbf84fff8, 0x7e020280,
4073 	0xbf810000, 0x00000000,
4074 };
4075 
4076 static const u32 sgpr_init_compute_shader[] =
4077 {
4078 	0xb07c0000, 0xbe8000ff,
4079 	0x0000005f, 0xbee50080,
4080 	0xbe812c65, 0xbe822c65,
4081 	0xbe832c65, 0xbe842c65,
4082 	0xbe852c65, 0xb77c0005,
4083 	0x80808500, 0xbf84fff8,
4084 	0xbe800080, 0xbf810000,
4085 };
4086 
4087 static const u32 vgpr_init_compute_shader_arcturus[] = {
4088 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4089 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4090 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4091 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4092 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4093 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4094 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4095 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4096 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4097 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4098 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4099 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4100 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4101 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4102 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4103 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4104 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4105 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4106 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4107 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4108 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4109 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4110 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4111 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4112 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4113 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4114 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4115 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4116 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4117 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4118 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4119 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4120 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4121 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4122 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4123 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4124 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4125 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4126 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4127 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4128 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4129 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4130 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4131 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4132 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4133 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4134 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4135 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4136 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4137 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4138 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4139 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4140 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4141 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4142 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4143 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4144 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4145 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4146 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4147 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4148 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4149 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4150 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4151 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4152 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4153 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4154 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4155 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4156 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4157 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4158 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4159 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4160 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4161 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4162 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4163 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4164 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4165 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4166 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4167 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4168 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4169 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4170 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4171 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4172 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4173 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4174 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4175 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4176 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4177 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4178 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4179 	0xbf84fff8, 0xbf810000,
4180 };
4181 
4182 /* When below register arrays changed, please update gpr_reg_size,
4183   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4184   to cover all gfx9 ASICs */
4185 static const struct soc15_reg_entry vgpr_init_regs[] = {
4186    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4187    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4188    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4189    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4190    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4191    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4192    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4193    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4194    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4195    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4196    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4197    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4198    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4199    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4200 };
4201 
4202 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4203    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4204    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4205    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4206    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4207    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4208    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4209    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4210    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4211    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4212    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4213    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4214    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4215    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4216    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4217 };
4218 
4219 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4220    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4221    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4222    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4223    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4224    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4225    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4226    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4227    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4228    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4229    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4230    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4231    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4232    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4233    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4234 };
4235 
4236 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4237    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4238    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4239    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4240    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4241    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4242    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4243    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4244    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4245    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4246    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4247    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4248    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4249    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4250    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4251 };
4252 
4253 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4254    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4255    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4256    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4257    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4258    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4259    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4260    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4261    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4262    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4263    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4264    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4265    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4266    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4267    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4268    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4269    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4270    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4271    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4272    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4273    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4274    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4275    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4276    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4277    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4278    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4279    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4280    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4281    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4282    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4283    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4284    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4285    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4286    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4287 };
4288 
4289 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4290 {
4291 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4292 	int i, r;
4293 
4294 	/* only support when RAS is enabled */
4295 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4296 		return 0;
4297 
4298 	r = amdgpu_ring_alloc(ring, 7);
4299 	if (r) {
4300 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4301 			ring->name, r);
4302 		return r;
4303 	}
4304 
4305 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4306 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4307 
4308 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4309 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4310 				PACKET3_DMA_DATA_DST_SEL(1) |
4311 				PACKET3_DMA_DATA_SRC_SEL(2) |
4312 				PACKET3_DMA_DATA_ENGINE(0)));
4313 	amdgpu_ring_write(ring, 0);
4314 	amdgpu_ring_write(ring, 0);
4315 	amdgpu_ring_write(ring, 0);
4316 	amdgpu_ring_write(ring, 0);
4317 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4318 				adev->gds.gds_size);
4319 
4320 	amdgpu_ring_commit(ring);
4321 
4322 	for (i = 0; i < adev->usec_timeout; i++) {
4323 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4324 			break;
4325 		udelay(1);
4326 	}
4327 
4328 	if (i >= adev->usec_timeout)
4329 		r = -ETIMEDOUT;
4330 
4331 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4332 
4333 	return r;
4334 }
4335 
4336 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4337 {
4338 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4339 	struct amdgpu_ib ib;
4340 	struct dma_fence *f = NULL;
4341 	int r, i;
4342 	unsigned total_size, vgpr_offset, sgpr_offset;
4343 	u64 gpu_addr;
4344 
4345 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4346 						adev->gfx.config.max_cu_per_sh *
4347 						adev->gfx.config.max_sh_per_se;
4348 	int sgpr_work_group_size = 5;
4349 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4350 	int vgpr_init_shader_size;
4351 	const u32 *vgpr_init_shader_ptr;
4352 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4353 
4354 	/* only support when RAS is enabled */
4355 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4356 		return 0;
4357 
4358 	/* bail if the compute ring is not ready */
4359 	if (!ring->sched.ready)
4360 		return 0;
4361 
4362 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4363 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4364 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4365 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4366 	} else {
4367 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4368 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4369 		vgpr_init_regs_ptr = vgpr_init_regs;
4370 	}
4371 
4372 	total_size =
4373 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4374 	total_size +=
4375 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4376 	total_size +=
4377 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4378 	total_size = ALIGN(total_size, 256);
4379 	vgpr_offset = total_size;
4380 	total_size += ALIGN(vgpr_init_shader_size, 256);
4381 	sgpr_offset = total_size;
4382 	total_size += sizeof(sgpr_init_compute_shader);
4383 
4384 	/* allocate an indirect buffer to put the commands in */
4385 	memset(&ib, 0, sizeof(ib));
4386 	r = amdgpu_ib_get(adev, NULL, total_size,
4387 					AMDGPU_IB_POOL_DIRECT, &ib);
4388 	if (r) {
4389 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4390 		return r;
4391 	}
4392 
4393 	/* load the compute shaders */
4394 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4395 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4396 
4397 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4398 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4399 
4400 	/* init the ib length to 0 */
4401 	ib.length_dw = 0;
4402 
4403 	/* VGPR */
4404 	/* write the register state for the compute dispatch */
4405 	for (i = 0; i < gpr_reg_size; i++) {
4406 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4407 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4408 								- PACKET3_SET_SH_REG_START;
4409 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4410 	}
4411 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4412 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4413 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4414 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4415 							- PACKET3_SET_SH_REG_START;
4416 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4417 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4418 
4419 	/* write dispatch packet */
4420 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4421 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4422 	ib.ptr[ib.length_dw++] = 1; /* y */
4423 	ib.ptr[ib.length_dw++] = 1; /* z */
4424 	ib.ptr[ib.length_dw++] =
4425 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4426 
4427 	/* write CS partial flush packet */
4428 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4429 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4430 
4431 	/* SGPR1 */
4432 	/* write the register state for the compute dispatch */
4433 	for (i = 0; i < gpr_reg_size; i++) {
4434 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4435 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4436 								- PACKET3_SET_SH_REG_START;
4437 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4438 	}
4439 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4440 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4441 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4442 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4443 							- PACKET3_SET_SH_REG_START;
4444 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4445 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4446 
4447 	/* write dispatch packet */
4448 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4449 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4450 	ib.ptr[ib.length_dw++] = 1; /* y */
4451 	ib.ptr[ib.length_dw++] = 1; /* z */
4452 	ib.ptr[ib.length_dw++] =
4453 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4454 
4455 	/* write CS partial flush packet */
4456 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4457 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4458 
4459 	/* SGPR2 */
4460 	/* write the register state for the compute dispatch */
4461 	for (i = 0; i < gpr_reg_size; i++) {
4462 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4463 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4464 								- PACKET3_SET_SH_REG_START;
4465 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4466 	}
4467 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4468 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4469 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4470 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4471 							- PACKET3_SET_SH_REG_START;
4472 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4473 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4474 
4475 	/* write dispatch packet */
4476 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4477 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4478 	ib.ptr[ib.length_dw++] = 1; /* y */
4479 	ib.ptr[ib.length_dw++] = 1; /* z */
4480 	ib.ptr[ib.length_dw++] =
4481 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4482 
4483 	/* write CS partial flush packet */
4484 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4485 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4486 
4487 	/* shedule the ib on the ring */
4488 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4489 	if (r) {
4490 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4491 		goto fail;
4492 	}
4493 
4494 	/* wait for the GPU to finish processing the IB */
4495 	r = dma_fence_wait(f, false);
4496 	if (r) {
4497 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4498 		goto fail;
4499 	}
4500 
4501 fail:
4502 	amdgpu_ib_free(adev, &ib, NULL);
4503 	dma_fence_put(f);
4504 
4505 	return r;
4506 }
4507 
4508 static int gfx_v9_0_early_init(void *handle)
4509 {
4510 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4511 
4512 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4513 
4514 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4515 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4516 		adev->gfx.num_gfx_rings = 0;
4517 	else
4518 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4519 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4520 					  AMDGPU_MAX_COMPUTE_RINGS);
4521 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4522 	gfx_v9_0_set_ring_funcs(adev);
4523 	gfx_v9_0_set_irq_funcs(adev);
4524 	gfx_v9_0_set_gds_init(adev);
4525 	gfx_v9_0_set_rlc_funcs(adev);
4526 
4527 	/* init rlcg reg access ctrl */
4528 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4529 
4530 	return gfx_v9_0_init_microcode(adev);
4531 }
4532 
4533 static int gfx_v9_0_ecc_late_init(void *handle)
4534 {
4535 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4536 	int r;
4537 
4538 	/*
4539 	 * Temp workaround to fix the issue that CP firmware fails to
4540 	 * update read pointer when CPDMA is writing clearing operation
4541 	 * to GDS in suspend/resume sequence on several cards. So just
4542 	 * limit this operation in cold boot sequence.
4543 	 */
4544 	if ((!adev->in_suspend) &&
4545 	    (adev->gds.gds_size)) {
4546 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4547 		if (r)
4548 			return r;
4549 	}
4550 
4551 	/* requires IBs so do in late init after IB pool is initialized */
4552 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4553 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4554 	else
4555 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4556 
4557 	if (r)
4558 		return r;
4559 
4560 	if (adev->gfx.ras &&
4561 	    adev->gfx.ras->enable_watchdog_timer)
4562 		adev->gfx.ras->enable_watchdog_timer(adev);
4563 
4564 	return 0;
4565 }
4566 
4567 static int gfx_v9_0_late_init(void *handle)
4568 {
4569 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4570 	int r;
4571 
4572 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4573 	if (r)
4574 		return r;
4575 
4576 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4577 	if (r)
4578 		return r;
4579 
4580 	r = gfx_v9_0_ecc_late_init(handle);
4581 	if (r)
4582 		return r;
4583 
4584 	return 0;
4585 }
4586 
4587 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4588 {
4589 	uint32_t rlc_setting;
4590 
4591 	/* if RLC is not enabled, do nothing */
4592 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4593 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4594 		return false;
4595 
4596 	return true;
4597 }
4598 
4599 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4600 {
4601 	uint32_t data;
4602 	unsigned i;
4603 
4604 	data = RLC_SAFE_MODE__CMD_MASK;
4605 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4606 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4607 
4608 	/* wait for RLC_SAFE_MODE */
4609 	for (i = 0; i < adev->usec_timeout; i++) {
4610 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4611 			break;
4612 		udelay(1);
4613 	}
4614 }
4615 
4616 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4617 {
4618 	uint32_t data;
4619 
4620 	data = RLC_SAFE_MODE__CMD_MASK;
4621 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4622 }
4623 
4624 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4625 						bool enable)
4626 {
4627 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4628 
4629 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4630 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4631 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4632 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4633 	} else {
4634 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4635 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4636 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4637 	}
4638 
4639 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4640 }
4641 
4642 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4643 						bool enable)
4644 {
4645 	/* TODO: double check if we need to perform under safe mode */
4646 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4647 
4648 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4649 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4650 	else
4651 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4652 
4653 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4654 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4655 	else
4656 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4657 
4658 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4659 }
4660 
4661 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4662 						      bool enable)
4663 {
4664 	uint32_t data, def;
4665 
4666 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4667 
4668 	/* It is disabled by HW by default */
4669 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4670 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4671 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4672 
4673 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4674 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4675 
4676 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4677 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4678 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4679 
4680 		/* only for Vega10 & Raven1 */
4681 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4682 
4683 		if (def != data)
4684 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4685 
4686 		/* MGLS is a global flag to control all MGLS in GFX */
4687 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4688 			/* 2 - RLC memory Light sleep */
4689 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4690 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4691 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4692 				if (def != data)
4693 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4694 			}
4695 			/* 3 - CP memory Light sleep */
4696 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4697 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4698 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4699 				if (def != data)
4700 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4701 			}
4702 		}
4703 	} else {
4704 		/* 1 - MGCG_OVERRIDE */
4705 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4706 
4707 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4708 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4709 
4710 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4711 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4712 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4713 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4714 
4715 		if (def != data)
4716 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4717 
4718 		/* 2 - disable MGLS in RLC */
4719 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4720 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4721 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4722 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4723 		}
4724 
4725 		/* 3 - disable MGLS in CP */
4726 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4727 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4728 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4729 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4730 		}
4731 	}
4732 
4733 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4734 }
4735 
4736 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4737 					   bool enable)
4738 {
4739 	uint32_t data, def;
4740 
4741 	if (!adev->gfx.num_gfx_rings)
4742 		return;
4743 
4744 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4745 
4746 	/* Enable 3D CGCG/CGLS */
4747 	if (enable) {
4748 		/* write cmd to clear cgcg/cgls ov */
4749 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4750 		/* unset CGCG override */
4751 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4752 		/* update CGCG and CGLS override bits */
4753 		if (def != data)
4754 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4755 
4756 		/* enable 3Dcgcg FSM(0x0000363f) */
4757 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4758 
4759 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4760 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4761 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4762 		else
4763 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4764 
4765 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4766 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4767 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4768 		if (def != data)
4769 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4770 
4771 		/* set IDLE_POLL_COUNT(0x00900100) */
4772 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4773 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4774 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4775 		if (def != data)
4776 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4777 	} else {
4778 		/* Disable CGCG/CGLS */
4779 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4780 		/* disable cgcg, cgls should be disabled */
4781 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4782 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4783 		/* disable cgcg and cgls in FSM */
4784 		if (def != data)
4785 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4786 	}
4787 
4788 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4789 }
4790 
4791 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4792 						      bool enable)
4793 {
4794 	uint32_t def, data;
4795 
4796 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4797 
4798 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4799 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4800 		/* unset CGCG override */
4801 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4802 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4803 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4804 		else
4805 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4806 		/* update CGCG and CGLS override bits */
4807 		if (def != data)
4808 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4809 
4810 		/* enable cgcg FSM(0x0000363F) */
4811 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4812 
4813 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4814 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4815 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4816 		else
4817 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4818 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4819 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4820 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4821 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4822 		if (def != data)
4823 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4824 
4825 		/* set IDLE_POLL_COUNT(0x00900100) */
4826 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4827 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4828 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4829 		if (def != data)
4830 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4831 	} else {
4832 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4833 		/* reset CGCG/CGLS bits */
4834 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4835 		/* disable cgcg and cgls in FSM */
4836 		if (def != data)
4837 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4838 	}
4839 
4840 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4841 }
4842 
4843 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4844 					    bool enable)
4845 {
4846 	if (enable) {
4847 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4848 		 * ===  MGCG + MGLS ===
4849 		 */
4850 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4851 		/* ===  CGCG /CGLS for GFX 3D Only === */
4852 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4853 		/* ===  CGCG + CGLS === */
4854 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4855 	} else {
4856 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4857 		 * ===  CGCG + CGLS ===
4858 		 */
4859 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4860 		/* ===  CGCG /CGLS for GFX 3D Only === */
4861 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4862 		/* ===  MGCG + MGLS === */
4863 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4864 	}
4865 	return 0;
4866 }
4867 
4868 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4869 {
4870 	u32 reg, data;
4871 
4872 	amdgpu_gfx_off_ctrl(adev, false);
4873 
4874 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4875 	if (amdgpu_sriov_is_pp_one_vf(adev))
4876 		data = RREG32_NO_KIQ(reg);
4877 	else
4878 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4879 
4880 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4881 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4882 
4883 	if (amdgpu_sriov_is_pp_one_vf(adev))
4884 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4885 	else
4886 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4887 
4888 	amdgpu_gfx_off_ctrl(adev, true);
4889 }
4890 
4891 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4892 					uint32_t offset,
4893 					struct soc15_reg_rlcg *entries, int arr_size)
4894 {
4895 	int i;
4896 	uint32_t reg;
4897 
4898 	if (!entries)
4899 		return false;
4900 
4901 	for (i = 0; i < arr_size; i++) {
4902 		const struct soc15_reg_rlcg *entry;
4903 
4904 		entry = &entries[i];
4905 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4906 		if (offset == reg)
4907 			return true;
4908 	}
4909 
4910 	return false;
4911 }
4912 
4913 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4914 {
4915 	return gfx_v9_0_check_rlcg_range(adev, offset,
4916 					(void *)rlcg_access_gc_9_0,
4917 					ARRAY_SIZE(rlcg_access_gc_9_0));
4918 }
4919 
4920 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4921 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4922 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4923 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4924 	.init = gfx_v9_0_rlc_init,
4925 	.get_csb_size = gfx_v9_0_get_csb_size,
4926 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4927 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4928 	.resume = gfx_v9_0_rlc_resume,
4929 	.stop = gfx_v9_0_rlc_stop,
4930 	.reset = gfx_v9_0_rlc_reset,
4931 	.start = gfx_v9_0_rlc_start,
4932 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4933 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4934 };
4935 
4936 static int gfx_v9_0_set_powergating_state(void *handle,
4937 					  enum amd_powergating_state state)
4938 {
4939 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4940 	bool enable = (state == AMD_PG_STATE_GATE);
4941 
4942 	switch (adev->ip_versions[GC_HWIP][0]) {
4943 	case IP_VERSION(9, 2, 2):
4944 	case IP_VERSION(9, 1, 0):
4945 	case IP_VERSION(9, 3, 0):
4946 		if (!enable)
4947 			amdgpu_gfx_off_ctrl(adev, false);
4948 
4949 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4950 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4951 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4952 		} else {
4953 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4954 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4955 		}
4956 
4957 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4958 			gfx_v9_0_enable_cp_power_gating(adev, true);
4959 		else
4960 			gfx_v9_0_enable_cp_power_gating(adev, false);
4961 
4962 		/* update gfx cgpg state */
4963 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4964 
4965 		/* update mgcg state */
4966 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4967 
4968 		if (enable)
4969 			amdgpu_gfx_off_ctrl(adev, true);
4970 		break;
4971 	case IP_VERSION(9, 2, 1):
4972 		amdgpu_gfx_off_ctrl(adev, enable);
4973 		break;
4974 	default:
4975 		break;
4976 	}
4977 
4978 	return 0;
4979 }
4980 
4981 static int gfx_v9_0_set_clockgating_state(void *handle,
4982 					  enum amd_clockgating_state state)
4983 {
4984 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4985 
4986 	if (amdgpu_sriov_vf(adev))
4987 		return 0;
4988 
4989 	switch (adev->ip_versions[GC_HWIP][0]) {
4990 	case IP_VERSION(9, 0, 1):
4991 	case IP_VERSION(9, 2, 1):
4992 	case IP_VERSION(9, 4, 0):
4993 	case IP_VERSION(9, 2, 2):
4994 	case IP_VERSION(9, 1, 0):
4995 	case IP_VERSION(9, 4, 1):
4996 	case IP_VERSION(9, 3, 0):
4997 	case IP_VERSION(9, 4, 2):
4998 		gfx_v9_0_update_gfx_clock_gating(adev,
4999 						 state == AMD_CG_STATE_GATE);
5000 		break;
5001 	default:
5002 		break;
5003 	}
5004 	return 0;
5005 }
5006 
5007 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5008 {
5009 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5010 	int data;
5011 
5012 	if (amdgpu_sriov_vf(adev))
5013 		*flags = 0;
5014 
5015 	/* AMD_CG_SUPPORT_GFX_MGCG */
5016 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5017 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5018 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5019 
5020 	/* AMD_CG_SUPPORT_GFX_CGCG */
5021 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5022 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5023 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5024 
5025 	/* AMD_CG_SUPPORT_GFX_CGLS */
5026 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5027 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5028 
5029 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5030 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5031 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5032 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5033 
5034 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5035 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5036 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5037 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5038 
5039 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5040 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5041 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5042 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5043 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5044 
5045 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5046 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5047 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5048 	}
5049 }
5050 
5051 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5052 {
5053 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5054 }
5055 
5056 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5057 {
5058 	struct amdgpu_device *adev = ring->adev;
5059 	u64 wptr;
5060 
5061 	/* XXX check if swapping is necessary on BE */
5062 	if (ring->use_doorbell) {
5063 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5064 	} else {
5065 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5066 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5067 	}
5068 
5069 	return wptr;
5070 }
5071 
5072 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5073 {
5074 	struct amdgpu_device *adev = ring->adev;
5075 
5076 	if (ring->use_doorbell) {
5077 		/* XXX check if swapping is necessary on BE */
5078 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5079 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5080 	} else {
5081 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5082 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5083 	}
5084 }
5085 
5086 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5087 {
5088 	struct amdgpu_device *adev = ring->adev;
5089 	u32 ref_and_mask, reg_mem_engine;
5090 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5091 
5092 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5093 		switch (ring->me) {
5094 		case 1:
5095 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5096 			break;
5097 		case 2:
5098 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5099 			break;
5100 		default:
5101 			return;
5102 		}
5103 		reg_mem_engine = 0;
5104 	} else {
5105 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5106 		reg_mem_engine = 1; /* pfp */
5107 	}
5108 
5109 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5110 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5111 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5112 			      ref_and_mask, ref_and_mask, 0x20);
5113 }
5114 
5115 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5116 					struct amdgpu_job *job,
5117 					struct amdgpu_ib *ib,
5118 					uint32_t flags)
5119 {
5120 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5121 	u32 header, control = 0;
5122 
5123 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5124 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5125 	else
5126 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5127 
5128 	control |= ib->length_dw | (vmid << 24);
5129 
5130 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5131 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5132 
5133 		if (flags & AMDGPU_IB_PREEMPTED)
5134 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5135 
5136 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5137 			gfx_v9_0_ring_emit_de_meta(ring,
5138 						   (!amdgpu_sriov_vf(ring->adev) &&
5139 						   flags & AMDGPU_IB_PREEMPTED) ?
5140 						   true : false);
5141 	}
5142 
5143 	amdgpu_ring_write(ring, header);
5144 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5145 	amdgpu_ring_write(ring,
5146 #ifdef __BIG_ENDIAN
5147 		(2 << 0) |
5148 #endif
5149 		lower_32_bits(ib->gpu_addr));
5150 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5151 	amdgpu_ring_write(ring, control);
5152 }
5153 
5154 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5155 					  struct amdgpu_job *job,
5156 					  struct amdgpu_ib *ib,
5157 					  uint32_t flags)
5158 {
5159 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5160 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5161 
5162 	/* Currently, there is a high possibility to get wave ID mismatch
5163 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5164 	 * different wave IDs than the GDS expects. This situation happens
5165 	 * randomly when at least 5 compute pipes use GDS ordered append.
5166 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5167 	 * Those are probably bugs somewhere else in the kernel driver.
5168 	 *
5169 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5170 	 * GDS to 0 for this ring (me/pipe).
5171 	 */
5172 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5173 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5174 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5175 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5176 	}
5177 
5178 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5179 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5180 	amdgpu_ring_write(ring,
5181 #ifdef __BIG_ENDIAN
5182 				(2 << 0) |
5183 #endif
5184 				lower_32_bits(ib->gpu_addr));
5185 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5186 	amdgpu_ring_write(ring, control);
5187 }
5188 
5189 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5190 				     u64 seq, unsigned flags)
5191 {
5192 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5193 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5194 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5195 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5196 	uint32_t dw2 = 0;
5197 
5198 	/* RELEASE_MEM - flush caches, send int */
5199 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5200 
5201 	if (writeback) {
5202 		dw2 = EOP_TC_NC_ACTION_EN;
5203 	} else {
5204 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5205 				EOP_TC_MD_ACTION_EN;
5206 	}
5207 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5208 				EVENT_INDEX(5);
5209 	if (exec)
5210 		dw2 |= EOP_EXEC;
5211 
5212 	amdgpu_ring_write(ring, dw2);
5213 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5214 
5215 	/*
5216 	 * the address should be Qword aligned if 64bit write, Dword
5217 	 * aligned if only send 32bit data low (discard data high)
5218 	 */
5219 	if (write64bit)
5220 		BUG_ON(addr & 0x7);
5221 	else
5222 		BUG_ON(addr & 0x3);
5223 	amdgpu_ring_write(ring, lower_32_bits(addr));
5224 	amdgpu_ring_write(ring, upper_32_bits(addr));
5225 	amdgpu_ring_write(ring, lower_32_bits(seq));
5226 	amdgpu_ring_write(ring, upper_32_bits(seq));
5227 	amdgpu_ring_write(ring, 0);
5228 }
5229 
5230 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5231 {
5232 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5233 	uint32_t seq = ring->fence_drv.sync_seq;
5234 	uint64_t addr = ring->fence_drv.gpu_addr;
5235 
5236 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5237 			      lower_32_bits(addr), upper_32_bits(addr),
5238 			      seq, 0xffffffff, 4);
5239 }
5240 
5241 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5242 					unsigned vmid, uint64_t pd_addr)
5243 {
5244 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5245 
5246 	/* compute doesn't have PFP */
5247 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5248 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5249 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5250 		amdgpu_ring_write(ring, 0x0);
5251 	}
5252 }
5253 
5254 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5255 {
5256 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5257 }
5258 
5259 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5260 {
5261 	u64 wptr;
5262 
5263 	/* XXX check if swapping is necessary on BE */
5264 	if (ring->use_doorbell)
5265 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5266 	else
5267 		BUG();
5268 	return wptr;
5269 }
5270 
5271 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5272 {
5273 	struct amdgpu_device *adev = ring->adev;
5274 
5275 	/* XXX check if swapping is necessary on BE */
5276 	if (ring->use_doorbell) {
5277 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5278 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5279 	} else{
5280 		BUG(); /* only DOORBELL method supported on gfx9 now */
5281 	}
5282 }
5283 
5284 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5285 					 u64 seq, unsigned int flags)
5286 {
5287 	struct amdgpu_device *adev = ring->adev;
5288 
5289 	/* we only allocate 32bit for each seq wb address */
5290 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5291 
5292 	/* write fence seq to the "addr" */
5293 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5294 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5295 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5296 	amdgpu_ring_write(ring, lower_32_bits(addr));
5297 	amdgpu_ring_write(ring, upper_32_bits(addr));
5298 	amdgpu_ring_write(ring, lower_32_bits(seq));
5299 
5300 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5301 		/* set register to trigger INT */
5302 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5305 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5306 		amdgpu_ring_write(ring, 0);
5307 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5308 	}
5309 }
5310 
5311 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5312 {
5313 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5314 	amdgpu_ring_write(ring, 0);
5315 }
5316 
5317 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5318 {
5319 	struct amdgpu_device *adev = ring->adev;
5320 	struct v9_ce_ib_state ce_payload = {0};
5321 	uint64_t offset, ce_payload_gpu_addr;
5322 	void *ce_payload_cpu_addr;
5323 	int cnt;
5324 
5325 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5326 
5327 	if (ring->is_mes_queue) {
5328 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5329 				  gfx[0].gfx_meta_data) +
5330 			offsetof(struct v9_gfx_meta_data, ce_payload);
5331 		ce_payload_gpu_addr =
5332 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5333 		ce_payload_cpu_addr =
5334 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5335 	} else {
5336 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5337 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5338 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5339 	}
5340 
5341 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5342 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5343 				 WRITE_DATA_DST_SEL(8) |
5344 				 WR_CONFIRM) |
5345 				 WRITE_DATA_CACHE_POLICY(0));
5346 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5347 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5348 
5349 	if (resume)
5350 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5351 					   sizeof(ce_payload) >> 2);
5352 	else
5353 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5354 					   sizeof(ce_payload) >> 2);
5355 }
5356 
5357 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5358 {
5359 	int i, r = 0;
5360 	struct amdgpu_device *adev = ring->adev;
5361 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5362 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5363 	unsigned long flags;
5364 
5365 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5366 		return -EINVAL;
5367 
5368 	spin_lock_irqsave(&kiq->ring_lock, flags);
5369 
5370 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5371 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5372 		return -ENOMEM;
5373 	}
5374 
5375 	/* assert preemption condition */
5376 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5377 
5378 	ring->trail_seq += 1;
5379 	amdgpu_ring_alloc(ring, 13);
5380 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5381 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5382 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5383 	amdgpu_ring_emit_wreg(ring,
5384 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5385 			      0x0);
5386 
5387 	/* assert IB preemption, emit the trailing fence */
5388 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5389 				   ring->trail_fence_gpu_addr,
5390 				   ring->trail_seq);
5391 
5392 	amdgpu_ring_commit(kiq_ring);
5393 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5394 
5395 	/* poll the trailing fence */
5396 	for (i = 0; i < adev->usec_timeout; i++) {
5397 		if (ring->trail_seq ==
5398 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5399 			break;
5400 		udelay(1);
5401 	}
5402 
5403 	if (i >= adev->usec_timeout) {
5404 		r = -EINVAL;
5405 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5406 	}
5407 
5408 	amdgpu_ring_commit(ring);
5409 
5410 	/* deassert preemption condition */
5411 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5412 	return r;
5413 }
5414 
5415 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5416 {
5417 	struct amdgpu_device *adev = ring->adev;
5418 	struct v9_de_ib_state de_payload = {0};
5419 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5420 	void *de_payload_cpu_addr;
5421 	int cnt;
5422 
5423 	if (ring->is_mes_queue) {
5424 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5425 				  gfx[0].gfx_meta_data) +
5426 			offsetof(struct v9_gfx_meta_data, de_payload);
5427 		de_payload_gpu_addr =
5428 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5429 		de_payload_cpu_addr =
5430 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5431 
5432 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5433 				  gfx[0].gds_backup) +
5434 			offsetof(struct v9_gfx_meta_data, de_payload);
5435 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5436 	} else {
5437 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5438 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5439 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5440 
5441 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5442 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5443 				 PAGE_SIZE);
5444 	}
5445 
5446 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5447 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5448 
5449 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5450 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5451 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5452 				 WRITE_DATA_DST_SEL(8) |
5453 				 WR_CONFIRM) |
5454 				 WRITE_DATA_CACHE_POLICY(0));
5455 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5456 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5457 
5458 	if (resume)
5459 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5460 					   sizeof(de_payload) >> 2);
5461 	else
5462 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5463 					   sizeof(de_payload) >> 2);
5464 }
5465 
5466 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5467 				   bool secure)
5468 {
5469 	uint32_t v = secure ? FRAME_TMZ : 0;
5470 
5471 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5472 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5473 }
5474 
5475 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5476 {
5477 	uint32_t dw2 = 0;
5478 
5479 	gfx_v9_0_ring_emit_ce_meta(ring,
5480 				   (!amdgpu_sriov_vf(ring->adev) &&
5481 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5482 
5483 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5484 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5485 		/* set load_global_config & load_global_uconfig */
5486 		dw2 |= 0x8001;
5487 		/* set load_cs_sh_regs */
5488 		dw2 |= 0x01000000;
5489 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5490 		dw2 |= 0x10002;
5491 
5492 		/* set load_ce_ram if preamble presented */
5493 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5494 			dw2 |= 0x10000000;
5495 	} else {
5496 		/* still load_ce_ram if this is the first time preamble presented
5497 		 * although there is no context switch happens.
5498 		 */
5499 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5500 			dw2 |= 0x10000000;
5501 	}
5502 
5503 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5504 	amdgpu_ring_write(ring, dw2);
5505 	amdgpu_ring_write(ring, 0);
5506 }
5507 
5508 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5509 {
5510 	unsigned ret;
5511 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5512 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5513 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5514 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5515 	ret = ring->wptr & ring->buf_mask;
5516 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5517 	return ret;
5518 }
5519 
5520 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5521 {
5522 	unsigned cur;
5523 	BUG_ON(offset > ring->buf_mask);
5524 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5525 
5526 	cur = (ring->wptr - 1) & ring->buf_mask;
5527 	if (likely(cur > offset))
5528 		ring->ring[offset] = cur - offset;
5529 	else
5530 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5531 }
5532 
5533 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5534 				    uint32_t reg_val_offs)
5535 {
5536 	struct amdgpu_device *adev = ring->adev;
5537 
5538 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5539 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5540 				(5 << 8) |	/* dst: memory */
5541 				(1 << 20));	/* write confirm */
5542 	amdgpu_ring_write(ring, reg);
5543 	amdgpu_ring_write(ring, 0);
5544 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5545 				reg_val_offs * 4));
5546 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5547 				reg_val_offs * 4));
5548 }
5549 
5550 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5551 				    uint32_t val)
5552 {
5553 	uint32_t cmd = 0;
5554 
5555 	switch (ring->funcs->type) {
5556 	case AMDGPU_RING_TYPE_GFX:
5557 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5558 		break;
5559 	case AMDGPU_RING_TYPE_KIQ:
5560 		cmd = (1 << 16); /* no inc addr */
5561 		break;
5562 	default:
5563 		cmd = WR_CONFIRM;
5564 		break;
5565 	}
5566 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5567 	amdgpu_ring_write(ring, cmd);
5568 	amdgpu_ring_write(ring, reg);
5569 	amdgpu_ring_write(ring, 0);
5570 	amdgpu_ring_write(ring, val);
5571 }
5572 
5573 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5574 					uint32_t val, uint32_t mask)
5575 {
5576 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5577 }
5578 
5579 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5580 						  uint32_t reg0, uint32_t reg1,
5581 						  uint32_t ref, uint32_t mask)
5582 {
5583 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5584 	struct amdgpu_device *adev = ring->adev;
5585 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5586 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5587 
5588 	if (fw_version_ok)
5589 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5590 				      ref, mask, 0x20);
5591 	else
5592 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5593 							   ref, mask);
5594 }
5595 
5596 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5597 {
5598 	struct amdgpu_device *adev = ring->adev;
5599 	uint32_t value = 0;
5600 
5601 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5602 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5603 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5604 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5605 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5606 }
5607 
5608 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5609 						 enum amdgpu_interrupt_state state)
5610 {
5611 	switch (state) {
5612 	case AMDGPU_IRQ_STATE_DISABLE:
5613 	case AMDGPU_IRQ_STATE_ENABLE:
5614 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5615 			       TIME_STAMP_INT_ENABLE,
5616 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5617 		break;
5618 	default:
5619 		break;
5620 	}
5621 }
5622 
5623 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5624 						     int me, int pipe,
5625 						     enum amdgpu_interrupt_state state)
5626 {
5627 	u32 mec_int_cntl, mec_int_cntl_reg;
5628 
5629 	/*
5630 	 * amdgpu controls only the first MEC. That's why this function only
5631 	 * handles the setting of interrupts for this specific MEC. All other
5632 	 * pipes' interrupts are set by amdkfd.
5633 	 */
5634 
5635 	if (me == 1) {
5636 		switch (pipe) {
5637 		case 0:
5638 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5639 			break;
5640 		case 1:
5641 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5642 			break;
5643 		case 2:
5644 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5645 			break;
5646 		case 3:
5647 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5648 			break;
5649 		default:
5650 			DRM_DEBUG("invalid pipe %d\n", pipe);
5651 			return;
5652 		}
5653 	} else {
5654 		DRM_DEBUG("invalid me %d\n", me);
5655 		return;
5656 	}
5657 
5658 	switch (state) {
5659 	case AMDGPU_IRQ_STATE_DISABLE:
5660 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5661 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5662 					     TIME_STAMP_INT_ENABLE, 0);
5663 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5664 		break;
5665 	case AMDGPU_IRQ_STATE_ENABLE:
5666 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5667 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5668 					     TIME_STAMP_INT_ENABLE, 1);
5669 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5670 		break;
5671 	default:
5672 		break;
5673 	}
5674 }
5675 
5676 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5677 					     struct amdgpu_irq_src *source,
5678 					     unsigned type,
5679 					     enum amdgpu_interrupt_state state)
5680 {
5681 	switch (state) {
5682 	case AMDGPU_IRQ_STATE_DISABLE:
5683 	case AMDGPU_IRQ_STATE_ENABLE:
5684 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5685 			       PRIV_REG_INT_ENABLE,
5686 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5687 		break;
5688 	default:
5689 		break;
5690 	}
5691 
5692 	return 0;
5693 }
5694 
5695 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5696 					      struct amdgpu_irq_src *source,
5697 					      unsigned type,
5698 					      enum amdgpu_interrupt_state state)
5699 {
5700 	switch (state) {
5701 	case AMDGPU_IRQ_STATE_DISABLE:
5702 	case AMDGPU_IRQ_STATE_ENABLE:
5703 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5704 			       PRIV_INSTR_INT_ENABLE,
5705 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5706 		break;
5707 	default:
5708 		break;
5709 	}
5710 
5711 	return 0;
5712 }
5713 
5714 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5715 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5716 			CP_ECC_ERROR_INT_ENABLE, 1)
5717 
5718 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5719 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5720 			CP_ECC_ERROR_INT_ENABLE, 0)
5721 
5722 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5723 					      struct amdgpu_irq_src *source,
5724 					      unsigned type,
5725 					      enum amdgpu_interrupt_state state)
5726 {
5727 	switch (state) {
5728 	case AMDGPU_IRQ_STATE_DISABLE:
5729 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5730 				CP_ECC_ERROR_INT_ENABLE, 0);
5731 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5732 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5733 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5734 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5735 		break;
5736 
5737 	case AMDGPU_IRQ_STATE_ENABLE:
5738 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5739 				CP_ECC_ERROR_INT_ENABLE, 1);
5740 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5741 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5742 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5743 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5744 		break;
5745 	default:
5746 		break;
5747 	}
5748 
5749 	return 0;
5750 }
5751 
5752 
5753 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5754 					    struct amdgpu_irq_src *src,
5755 					    unsigned type,
5756 					    enum amdgpu_interrupt_state state)
5757 {
5758 	switch (type) {
5759 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5760 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5761 		break;
5762 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5763 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5764 		break;
5765 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5766 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5767 		break;
5768 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5769 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5770 		break;
5771 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5772 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5773 		break;
5774 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5775 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5776 		break;
5777 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5778 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5779 		break;
5780 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5781 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5782 		break;
5783 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5784 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5785 		break;
5786 	default:
5787 		break;
5788 	}
5789 	return 0;
5790 }
5791 
5792 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5793 			    struct amdgpu_irq_src *source,
5794 			    struct amdgpu_iv_entry *entry)
5795 {
5796 	int i;
5797 	u8 me_id, pipe_id, queue_id;
5798 	struct amdgpu_ring *ring;
5799 
5800 	DRM_DEBUG("IH: CP EOP\n");
5801 	me_id = (entry->ring_id & 0x0c) >> 2;
5802 	pipe_id = (entry->ring_id & 0x03) >> 0;
5803 	queue_id = (entry->ring_id & 0x70) >> 4;
5804 
5805 	switch (me_id) {
5806 	case 0:
5807 		if (adev->gfx.num_gfx_rings &&
5808 		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5809 			/* Fence signals are handled on the software rings*/
5810 			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5811 				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5812 		}
5813 		break;
5814 	case 1:
5815 	case 2:
5816 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5817 			ring = &adev->gfx.compute_ring[i];
5818 			/* Per-queue interrupt is supported for MEC starting from VI.
5819 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5820 			  */
5821 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5822 				amdgpu_fence_process(ring);
5823 		}
5824 		break;
5825 	}
5826 	return 0;
5827 }
5828 
5829 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5830 			   struct amdgpu_iv_entry *entry)
5831 {
5832 	u8 me_id, pipe_id, queue_id;
5833 	struct amdgpu_ring *ring;
5834 	int i;
5835 
5836 	me_id = (entry->ring_id & 0x0c) >> 2;
5837 	pipe_id = (entry->ring_id & 0x03) >> 0;
5838 	queue_id = (entry->ring_id & 0x70) >> 4;
5839 
5840 	switch (me_id) {
5841 	case 0:
5842 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5843 		break;
5844 	case 1:
5845 	case 2:
5846 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5847 			ring = &adev->gfx.compute_ring[i];
5848 			if (ring->me == me_id && ring->pipe == pipe_id &&
5849 			    ring->queue == queue_id)
5850 				drm_sched_fault(&ring->sched);
5851 		}
5852 		break;
5853 	}
5854 }
5855 
5856 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5857 				 struct amdgpu_irq_src *source,
5858 				 struct amdgpu_iv_entry *entry)
5859 {
5860 	DRM_ERROR("Illegal register access in command stream\n");
5861 	gfx_v9_0_fault(adev, entry);
5862 	return 0;
5863 }
5864 
5865 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5866 				  struct amdgpu_irq_src *source,
5867 				  struct amdgpu_iv_entry *entry)
5868 {
5869 	DRM_ERROR("Illegal instruction in command stream\n");
5870 	gfx_v9_0_fault(adev, entry);
5871 	return 0;
5872 }
5873 
5874 
5875 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5876 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5877 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5878 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5879 	},
5880 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5881 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5882 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5883 	},
5884 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5885 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5886 	  0, 0
5887 	},
5888 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5889 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5890 	  0, 0
5891 	},
5892 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5893 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5894 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5895 	},
5896 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5897 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5898 	  0, 0
5899 	},
5900 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5901 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5902 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5903 	},
5904 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5905 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5906 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5907 	},
5908 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5909 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5910 	  0, 0
5911 	},
5912 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5913 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5914 	  0, 0
5915 	},
5916 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5917 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5918 	  0, 0
5919 	},
5920 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5921 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5922 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5923 	},
5924 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5925 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5926 	  0, 0
5927 	},
5928 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5929 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5930 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5931 	},
5932 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5933 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5934 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5935 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5936 	},
5937 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5938 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5939 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5940 	  0, 0
5941 	},
5942 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5943 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5944 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5945 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5946 	},
5947 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5948 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5949 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5950 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5951 	},
5952 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5953 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5954 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5955 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5956 	},
5957 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5958 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5959 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5960 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5961 	},
5962 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5963 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5964 	  0, 0
5965 	},
5966 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5967 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5968 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5969 	},
5970 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5971 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5972 	  0, 0
5973 	},
5974 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5975 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5976 	  0, 0
5977 	},
5978 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5979 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5980 	  0, 0
5981 	},
5982 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5983 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5984 	  0, 0
5985 	},
5986 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5987 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5988 	  0, 0
5989 	},
5990 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5991 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5992 	  0, 0
5993 	},
5994 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5995 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5996 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5997 	},
5998 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5999 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6000 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6001 	},
6002 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6003 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6004 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6005 	},
6006 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6007 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6008 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6009 	},
6010 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6011 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6012 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6013 	},
6014 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6015 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6016 	  0, 0
6017 	},
6018 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6019 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6020 	  0, 0
6021 	},
6022 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6023 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6024 	  0, 0
6025 	},
6026 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6027 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6028 	  0, 0
6029 	},
6030 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6031 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6032 	  0, 0
6033 	},
6034 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6035 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6036 	  0, 0
6037 	},
6038 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6039 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6040 	  0, 0
6041 	},
6042 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6043 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6044 	  0, 0
6045 	},
6046 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6047 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6048 	  0, 0
6049 	},
6050 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6051 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6052 	  0, 0
6053 	},
6054 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6055 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6056 	  0, 0
6057 	},
6058 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6059 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6060 	  0, 0
6061 	},
6062 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6063 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6064 	  0, 0
6065 	},
6066 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6067 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6068 	  0, 0
6069 	},
6070 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6071 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6072 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6073 	},
6074 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6075 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6076 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6077 	},
6078 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6079 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6080 	  0, 0
6081 	},
6082 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6083 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6084 	  0, 0
6085 	},
6086 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6087 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6088 	  0, 0
6089 	},
6090 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6091 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6092 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6093 	},
6094 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6095 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6096 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6097 	},
6098 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6099 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6100 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6101 	},
6102 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6103 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6104 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6105 	},
6106 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6107 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6108 	  0, 0
6109 	},
6110 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6111 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6112 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6113 	},
6114 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6115 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6116 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6117 	},
6118 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6119 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6120 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6121 	},
6122 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6123 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6124 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6125 	},
6126 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6127 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6128 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6129 	},
6130 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6131 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6132 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6133 	},
6134 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6135 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6136 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6137 	},
6138 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6139 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6140 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6141 	},
6142 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6143 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6144 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6145 	},
6146 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6147 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6148 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6149 	},
6150 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6151 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6152 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6153 	},
6154 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6155 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6156 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6157 	},
6158 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6159 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6160 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6161 	},
6162 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6163 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6164 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6165 	},
6166 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6167 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6168 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6169 	},
6170 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6171 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6172 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6173 	},
6174 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6175 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6176 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6177 	},
6178 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6179 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6180 	  0, 0
6181 	},
6182 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6183 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6184 	  0, 0
6185 	},
6186 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6187 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6188 	  0, 0
6189 	},
6190 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6191 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6192 	  0, 0
6193 	},
6194 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6195 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6196 	  0, 0
6197 	},
6198 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6199 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6200 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6201 	},
6202 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6203 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6204 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6205 	},
6206 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6207 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6208 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6209 	},
6210 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6211 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6212 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6213 	},
6214 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6215 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6216 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6217 	},
6218 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6219 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6220 	  0, 0
6221 	},
6222 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6223 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6224 	  0, 0
6225 	},
6226 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6227 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6228 	  0, 0
6229 	},
6230 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6231 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6232 	  0, 0
6233 	},
6234 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6235 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6236 	  0, 0
6237 	},
6238 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6239 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6240 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6241 	},
6242 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6243 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6244 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6245 	},
6246 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6247 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6248 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6249 	},
6250 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6251 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6252 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6253 	},
6254 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6255 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6256 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6257 	},
6258 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6259 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6260 	  0, 0
6261 	},
6262 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6263 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6264 	  0, 0
6265 	},
6266 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6267 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6268 	  0, 0
6269 	},
6270 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6271 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6272 	  0, 0
6273 	},
6274 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6275 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6276 	  0, 0
6277 	},
6278 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6279 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6280 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6281 	},
6282 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6283 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6284 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6285 	},
6286 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6287 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6288 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6289 	},
6290 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6291 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6292 	  0, 0
6293 	},
6294 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6295 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6296 	  0, 0
6297 	},
6298 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6299 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6300 	  0, 0
6301 	},
6302 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6303 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6304 	  0, 0
6305 	},
6306 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6307 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6308 	  0, 0
6309 	},
6310 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6311 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6312 	  0, 0
6313 	}
6314 };
6315 
6316 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6317 				     void *inject_if)
6318 {
6319 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6320 	int ret;
6321 	struct ta_ras_trigger_error_input block_info = { 0 };
6322 
6323 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6324 		return -EINVAL;
6325 
6326 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6327 		return -EINVAL;
6328 
6329 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6330 		return -EPERM;
6331 
6332 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6333 	      info->head.type)) {
6334 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6335 			ras_gfx_subblocks[info->head.sub_block_index].name,
6336 			info->head.type);
6337 		return -EPERM;
6338 	}
6339 
6340 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6341 	      info->head.type)) {
6342 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6343 			ras_gfx_subblocks[info->head.sub_block_index].name,
6344 			info->head.type);
6345 		return -EPERM;
6346 	}
6347 
6348 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6349 	block_info.sub_block_index =
6350 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6351 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6352 	block_info.address = info->address;
6353 	block_info.value = info->value;
6354 
6355 	mutex_lock(&adev->grbm_idx_mutex);
6356 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6357 	mutex_unlock(&adev->grbm_idx_mutex);
6358 
6359 	return ret;
6360 }
6361 
6362 static const char *vml2_mems[] = {
6363 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6364 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6365 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6366 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6367 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6368 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6369 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6370 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6371 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6372 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6373 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6374 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6375 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6376 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6377 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6378 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6379 };
6380 
6381 static const char *vml2_walker_mems[] = {
6382 	"UTC_VML2_CACHE_PDE0_MEM0",
6383 	"UTC_VML2_CACHE_PDE0_MEM1",
6384 	"UTC_VML2_CACHE_PDE1_MEM0",
6385 	"UTC_VML2_CACHE_PDE1_MEM1",
6386 	"UTC_VML2_CACHE_PDE2_MEM0",
6387 	"UTC_VML2_CACHE_PDE2_MEM1",
6388 	"UTC_VML2_RDIF_LOG_FIFO",
6389 };
6390 
6391 static const char *atc_l2_cache_2m_mems[] = {
6392 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6393 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6394 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6395 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6396 };
6397 
6398 static const char *atc_l2_cache_4k_mems[] = {
6399 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6400 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6401 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6402 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6403 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6404 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6405 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6406 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6407 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6408 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6409 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6410 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6411 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6412 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6413 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6414 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6415 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6416 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6417 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6418 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6419 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6420 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6421 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6422 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6423 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6424 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6425 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6426 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6427 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6428 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6429 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6430 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6431 };
6432 
6433 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6434 					 struct ras_err_data *err_data)
6435 {
6436 	uint32_t i, data;
6437 	uint32_t sec_count, ded_count;
6438 
6439 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6440 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6441 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6442 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6443 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6444 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6445 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6446 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6447 
6448 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6449 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6450 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6451 
6452 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6453 		if (sec_count) {
6454 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6455 				"SEC %d\n", i, vml2_mems[i], sec_count);
6456 			err_data->ce_count += sec_count;
6457 		}
6458 
6459 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6460 		if (ded_count) {
6461 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6462 				"DED %d\n", i, vml2_mems[i], ded_count);
6463 			err_data->ue_count += ded_count;
6464 		}
6465 	}
6466 
6467 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6468 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6469 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6470 
6471 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6472 						SEC_COUNT);
6473 		if (sec_count) {
6474 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6475 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6476 			err_data->ce_count += sec_count;
6477 		}
6478 
6479 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6480 						DED_COUNT);
6481 		if (ded_count) {
6482 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6483 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6484 			err_data->ue_count += ded_count;
6485 		}
6486 	}
6487 
6488 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6489 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6490 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6491 
6492 		sec_count = (data & 0x00006000L) >> 0xd;
6493 		if (sec_count) {
6494 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6495 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6496 				sec_count);
6497 			err_data->ce_count += sec_count;
6498 		}
6499 	}
6500 
6501 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6502 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6503 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6504 
6505 		sec_count = (data & 0x00006000L) >> 0xd;
6506 		if (sec_count) {
6507 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6508 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6509 				sec_count);
6510 			err_data->ce_count += sec_count;
6511 		}
6512 
6513 		ded_count = (data & 0x00018000L) >> 0xf;
6514 		if (ded_count) {
6515 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6516 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6517 				ded_count);
6518 			err_data->ue_count += ded_count;
6519 		}
6520 	}
6521 
6522 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6523 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6524 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6525 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6526 
6527 	return 0;
6528 }
6529 
6530 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6531 	const struct soc15_reg_entry *reg,
6532 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6533 	uint32_t *sec_count, uint32_t *ded_count)
6534 {
6535 	uint32_t i;
6536 	uint32_t sec_cnt, ded_cnt;
6537 
6538 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6539 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6540 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6541 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6542 			continue;
6543 
6544 		sec_cnt = (value &
6545 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6546 				gfx_v9_0_ras_fields[i].sec_count_shift;
6547 		if (sec_cnt) {
6548 			dev_info(adev->dev, "GFX SubBlock %s, "
6549 				"Instance[%d][%d], SEC %d\n",
6550 				gfx_v9_0_ras_fields[i].name,
6551 				se_id, inst_id,
6552 				sec_cnt);
6553 			*sec_count += sec_cnt;
6554 		}
6555 
6556 		ded_cnt = (value &
6557 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6558 				gfx_v9_0_ras_fields[i].ded_count_shift;
6559 		if (ded_cnt) {
6560 			dev_info(adev->dev, "GFX SubBlock %s, "
6561 				"Instance[%d][%d], DED %d\n",
6562 				gfx_v9_0_ras_fields[i].name,
6563 				se_id, inst_id,
6564 				ded_cnt);
6565 			*ded_count += ded_cnt;
6566 		}
6567 	}
6568 
6569 	return 0;
6570 }
6571 
6572 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6573 {
6574 	int i, j, k;
6575 
6576 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6577 		return;
6578 
6579 	/* read back registers to clear the counters */
6580 	mutex_lock(&adev->grbm_idx_mutex);
6581 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6582 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6583 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6584 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
6585 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6586 			}
6587 		}
6588 	}
6589 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6590 	mutex_unlock(&adev->grbm_idx_mutex);
6591 
6592 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6593 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6594 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6595 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6596 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6597 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6598 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6599 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6600 
6601 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6602 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6603 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6604 	}
6605 
6606 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6607 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6608 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6609 	}
6610 
6611 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6612 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6613 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6614 	}
6615 
6616 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6617 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6618 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6619 	}
6620 
6621 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6622 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6623 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6624 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6625 }
6626 
6627 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6628 					  void *ras_error_status)
6629 {
6630 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6631 	uint32_t sec_count = 0, ded_count = 0;
6632 	uint32_t i, j, k;
6633 	uint32_t reg_value;
6634 
6635 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6636 		return;
6637 
6638 	err_data->ue_count = 0;
6639 	err_data->ce_count = 0;
6640 
6641 	mutex_lock(&adev->grbm_idx_mutex);
6642 
6643 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6644 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6645 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6646 				amdgpu_gfx_select_se_sh(adev, j, 0, k);
6647 				reg_value =
6648 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6649 				if (reg_value)
6650 					gfx_v9_0_ras_error_count(adev,
6651 						&gfx_v9_0_edc_counter_regs[i],
6652 						j, k, reg_value,
6653 						&sec_count, &ded_count);
6654 			}
6655 		}
6656 	}
6657 
6658 	err_data->ce_count += sec_count;
6659 	err_data->ue_count += ded_count;
6660 
6661 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6662 	mutex_unlock(&adev->grbm_idx_mutex);
6663 
6664 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6665 }
6666 
6667 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6668 {
6669 	const unsigned int cp_coher_cntl =
6670 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6671 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6672 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6673 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6674 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6675 
6676 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6677 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6678 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6679 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6680 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6681 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6682 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6683 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6684 }
6685 
6686 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6687 					uint32_t pipe, bool enable)
6688 {
6689 	struct amdgpu_device *adev = ring->adev;
6690 	uint32_t val;
6691 	uint32_t wcl_cs_reg;
6692 
6693 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6694 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6695 
6696 	switch (pipe) {
6697 	case 0:
6698 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6699 		break;
6700 	case 1:
6701 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6702 		break;
6703 	case 2:
6704 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6705 		break;
6706 	case 3:
6707 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6708 		break;
6709 	default:
6710 		DRM_DEBUG("invalid pipe %d\n", pipe);
6711 		return;
6712 	}
6713 
6714 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6715 
6716 }
6717 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6718 {
6719 	struct amdgpu_device *adev = ring->adev;
6720 	uint32_t val;
6721 	int i;
6722 
6723 
6724 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6725 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6726 	 * around 25% of gpu resources.
6727 	 */
6728 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6729 	amdgpu_ring_emit_wreg(ring,
6730 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6731 			      val);
6732 
6733 	/* Restrict waves for normal/low priority compute queues as well
6734 	 * to get best QoS for high priority compute jobs.
6735 	 *
6736 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6737 	 */
6738 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6739 		if (i != ring->pipe)
6740 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6741 
6742 	}
6743 }
6744 
6745 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6746 	.name = "gfx_v9_0",
6747 	.early_init = gfx_v9_0_early_init,
6748 	.late_init = gfx_v9_0_late_init,
6749 	.sw_init = gfx_v9_0_sw_init,
6750 	.sw_fini = gfx_v9_0_sw_fini,
6751 	.hw_init = gfx_v9_0_hw_init,
6752 	.hw_fini = gfx_v9_0_hw_fini,
6753 	.suspend = gfx_v9_0_suspend,
6754 	.resume = gfx_v9_0_resume,
6755 	.is_idle = gfx_v9_0_is_idle,
6756 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6757 	.soft_reset = gfx_v9_0_soft_reset,
6758 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6759 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6760 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6761 };
6762 
6763 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6764 	.type = AMDGPU_RING_TYPE_GFX,
6765 	.align_mask = 0xff,
6766 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6767 	.support_64bit_ptrs = true,
6768 	.secure_submission_supported = true,
6769 	.vmhub = AMDGPU_GFXHUB_0,
6770 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6771 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6772 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6773 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6774 		5 +  /* COND_EXEC */
6775 		7 +  /* PIPELINE_SYNC */
6776 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6777 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6778 		2 + /* VM_FLUSH */
6779 		8 +  /* FENCE for VM_FLUSH */
6780 		20 + /* GDS switch */
6781 		4 + /* double SWITCH_BUFFER,
6782 		       the first COND_EXEC jump to the place just
6783 			   prior to this double SWITCH_BUFFER  */
6784 		5 + /* COND_EXEC */
6785 		7 +	 /*	HDP_flush */
6786 		4 +	 /*	VGT_flush */
6787 		14 + /*	CE_META */
6788 		31 + /*	DE_META */
6789 		3 + /* CNTX_CTRL */
6790 		5 + /* HDP_INVL */
6791 		8 + 8 + /* FENCE x2 */
6792 		2 + /* SWITCH_BUFFER */
6793 		7, /* gfx_v9_0_emit_mem_sync */
6794 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6795 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6796 	.emit_fence = gfx_v9_0_ring_emit_fence,
6797 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6798 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6799 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6800 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6801 	.test_ring = gfx_v9_0_ring_test_ring,
6802 	.test_ib = gfx_v9_0_ring_test_ib,
6803 	.insert_nop = amdgpu_ring_insert_nop,
6804 	.pad_ib = amdgpu_ring_generic_pad_ib,
6805 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6806 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6807 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6808 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6809 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
6810 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6811 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6812 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6813 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6814 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6815 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6816 };
6817 
6818 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6819 	.type = AMDGPU_RING_TYPE_GFX,
6820 	.align_mask = 0xff,
6821 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6822 	.support_64bit_ptrs = true,
6823 	.secure_submission_supported = true,
6824 	.vmhub = AMDGPU_GFXHUB_0,
6825 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6826 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6827 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6828 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6829 		5 +  /* COND_EXEC */
6830 		7 +  /* PIPELINE_SYNC */
6831 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6832 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6833 		2 + /* VM_FLUSH */
6834 		8 +  /* FENCE for VM_FLUSH */
6835 		20 + /* GDS switch */
6836 		4 + /* double SWITCH_BUFFER,
6837 		     * the first COND_EXEC jump to the place just
6838 		     * prior to this double SWITCH_BUFFER
6839 		     */
6840 		5 + /* COND_EXEC */
6841 		7 +	 /*	HDP_flush */
6842 		4 +	 /*	VGT_flush */
6843 		14 + /*	CE_META */
6844 		31 + /*	DE_META */
6845 		3 + /* CNTX_CTRL */
6846 		5 + /* HDP_INVL */
6847 		8 + 8 + /* FENCE x2 */
6848 		2 + /* SWITCH_BUFFER */
6849 		7, /* gfx_v9_0_emit_mem_sync */
6850 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6851 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6852 	.emit_fence = gfx_v9_0_ring_emit_fence,
6853 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6854 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6855 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6856 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6857 	.test_ring = gfx_v9_0_ring_test_ring,
6858 	.test_ib = gfx_v9_0_ring_test_ib,
6859 	.insert_nop = amdgpu_sw_ring_insert_nop,
6860 	.pad_ib = amdgpu_ring_generic_pad_ib,
6861 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6862 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6863 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6864 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6865 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6866 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6867 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6868 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6869 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6870 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6871 };
6872 
6873 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6874 	.type = AMDGPU_RING_TYPE_COMPUTE,
6875 	.align_mask = 0xff,
6876 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6877 	.support_64bit_ptrs = true,
6878 	.vmhub = AMDGPU_GFXHUB_0,
6879 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6880 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6881 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6882 	.emit_frame_size =
6883 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6884 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6885 		5 + /* hdp invalidate */
6886 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6887 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6888 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6889 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6890 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6891 		7 + /* gfx_v9_0_emit_mem_sync */
6892 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6893 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6894 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6895 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6896 	.emit_fence = gfx_v9_0_ring_emit_fence,
6897 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6898 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6899 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6900 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6901 	.test_ring = gfx_v9_0_ring_test_ring,
6902 	.test_ib = gfx_v9_0_ring_test_ib,
6903 	.insert_nop = amdgpu_ring_insert_nop,
6904 	.pad_ib = amdgpu_ring_generic_pad_ib,
6905 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6906 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6907 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6908 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6909 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6910 };
6911 
6912 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6913 	.type = AMDGPU_RING_TYPE_KIQ,
6914 	.align_mask = 0xff,
6915 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6916 	.support_64bit_ptrs = true,
6917 	.vmhub = AMDGPU_GFXHUB_0,
6918 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6919 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6920 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6921 	.emit_frame_size =
6922 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6923 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6924 		5 + /* hdp invalidate */
6925 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6926 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6927 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6928 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6929 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6930 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6931 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6932 	.test_ring = gfx_v9_0_ring_test_ring,
6933 	.insert_nop = amdgpu_ring_insert_nop,
6934 	.pad_ib = amdgpu_ring_generic_pad_ib,
6935 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6936 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6937 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6938 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6939 };
6940 
6941 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6942 {
6943 	int i;
6944 
6945 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6946 
6947 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6948 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6949 
6950 	if (adev->gfx.num_gfx_rings) {
6951 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6952 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
6953 	}
6954 
6955 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6956 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6957 }
6958 
6959 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6960 	.set = gfx_v9_0_set_eop_interrupt_state,
6961 	.process = gfx_v9_0_eop_irq,
6962 };
6963 
6964 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6965 	.set = gfx_v9_0_set_priv_reg_fault_state,
6966 	.process = gfx_v9_0_priv_reg_irq,
6967 };
6968 
6969 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6970 	.set = gfx_v9_0_set_priv_inst_fault_state,
6971 	.process = gfx_v9_0_priv_inst_irq,
6972 };
6973 
6974 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6975 	.set = gfx_v9_0_set_cp_ecc_error_state,
6976 	.process = amdgpu_gfx_cp_ecc_error_irq,
6977 };
6978 
6979 
6980 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6981 {
6982 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6983 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6984 
6985 	adev->gfx.priv_reg_irq.num_types = 1;
6986 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6987 
6988 	adev->gfx.priv_inst_irq.num_types = 1;
6989 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6990 
6991 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6992 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6993 }
6994 
6995 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6996 {
6997 	switch (adev->ip_versions[GC_HWIP][0]) {
6998 	case IP_VERSION(9, 0, 1):
6999 	case IP_VERSION(9, 2, 1):
7000 	case IP_VERSION(9, 4, 0):
7001 	case IP_VERSION(9, 2, 2):
7002 	case IP_VERSION(9, 1, 0):
7003 	case IP_VERSION(9, 4, 1):
7004 	case IP_VERSION(9, 3, 0):
7005 	case IP_VERSION(9, 4, 2):
7006 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7007 		break;
7008 	default:
7009 		break;
7010 	}
7011 }
7012 
7013 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7014 {
7015 	/* init asci gds info */
7016 	switch (adev->ip_versions[GC_HWIP][0]) {
7017 	case IP_VERSION(9, 0, 1):
7018 	case IP_VERSION(9, 2, 1):
7019 	case IP_VERSION(9, 4, 0):
7020 		adev->gds.gds_size = 0x10000;
7021 		break;
7022 	case IP_VERSION(9, 2, 2):
7023 	case IP_VERSION(9, 1, 0):
7024 	case IP_VERSION(9, 4, 1):
7025 		adev->gds.gds_size = 0x1000;
7026 		break;
7027 	case IP_VERSION(9, 4, 2):
7028 		/* aldebaran removed all the GDS internal memory,
7029 		 * only support GWS opcode in kernel, like barrier
7030 		 * semaphore.etc */
7031 		adev->gds.gds_size = 0;
7032 		break;
7033 	default:
7034 		adev->gds.gds_size = 0x10000;
7035 		break;
7036 	}
7037 
7038 	switch (adev->ip_versions[GC_HWIP][0]) {
7039 	case IP_VERSION(9, 0, 1):
7040 	case IP_VERSION(9, 4, 0):
7041 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7042 		break;
7043 	case IP_VERSION(9, 2, 1):
7044 		adev->gds.gds_compute_max_wave_id = 0x27f;
7045 		break;
7046 	case IP_VERSION(9, 2, 2):
7047 	case IP_VERSION(9, 1, 0):
7048 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7049 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7050 		else
7051 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7052 		break;
7053 	case IP_VERSION(9, 4, 1):
7054 		adev->gds.gds_compute_max_wave_id = 0xfff;
7055 		break;
7056 	case IP_VERSION(9, 4, 2):
7057 		/* deprecated for Aldebaran, no usage at all */
7058 		adev->gds.gds_compute_max_wave_id = 0;
7059 		break;
7060 	default:
7061 		/* this really depends on the chip */
7062 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7063 		break;
7064 	}
7065 
7066 	adev->gds.gws_size = 64;
7067 	adev->gds.oa_size = 16;
7068 }
7069 
7070 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7071 						 u32 bitmap)
7072 {
7073 	u32 data;
7074 
7075 	if (!bitmap)
7076 		return;
7077 
7078 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7079 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7080 
7081 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7082 }
7083 
7084 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7085 {
7086 	u32 data, mask;
7087 
7088 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7089 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7090 
7091 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7092 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7093 
7094 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7095 
7096 	return (~data) & mask;
7097 }
7098 
7099 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7100 				 struct amdgpu_cu_info *cu_info)
7101 {
7102 	int i, j, k, counter, active_cu_number = 0;
7103 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7104 	unsigned disable_masks[4 * 4];
7105 
7106 	if (!adev || !cu_info)
7107 		return -EINVAL;
7108 
7109 	/*
7110 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7111 	 */
7112 	if (adev->gfx.config.max_shader_engines *
7113 		adev->gfx.config.max_sh_per_se > 16)
7114 		return -EINVAL;
7115 
7116 	amdgpu_gfx_parse_disable_cu(disable_masks,
7117 				    adev->gfx.config.max_shader_engines,
7118 				    adev->gfx.config.max_sh_per_se);
7119 
7120 	mutex_lock(&adev->grbm_idx_mutex);
7121 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7122 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7123 			mask = 1;
7124 			ao_bitmap = 0;
7125 			counter = 0;
7126 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
7127 			gfx_v9_0_set_user_cu_inactive_bitmap(
7128 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7129 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7130 
7131 			/*
7132 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7133 			 * 4x4 size array, and it's usually suitable for Vega
7134 			 * ASICs which has 4*2 SE/SH layout.
7135 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7136 			 * To mostly reduce the impact, we make it compatible
7137 			 * with current bitmap array as below:
7138 			 *    SE4,SH0 --> bitmap[0][1]
7139 			 *    SE5,SH0 --> bitmap[1][1]
7140 			 *    SE6,SH0 --> bitmap[2][1]
7141 			 *    SE7,SH0 --> bitmap[3][1]
7142 			 */
7143 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7144 
7145 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7146 				if (bitmap & mask) {
7147 					if (counter < adev->gfx.config.max_cu_per_sh)
7148 						ao_bitmap |= mask;
7149 					counter ++;
7150 				}
7151 				mask <<= 1;
7152 			}
7153 			active_cu_number += counter;
7154 			if (i < 2 && j < 2)
7155 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7156 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7157 		}
7158 	}
7159 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7160 	mutex_unlock(&adev->grbm_idx_mutex);
7161 
7162 	cu_info->number = active_cu_number;
7163 	cu_info->ao_cu_mask = ao_cu_mask;
7164 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7165 
7166 	return 0;
7167 }
7168 
7169 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7170 {
7171 	.type = AMD_IP_BLOCK_TYPE_GFX,
7172 	.major = 9,
7173 	.minor = 0,
7174 	.rev = 0,
7175 	.funcs = &gfx_v9_0_ip_funcs,
7176 };
7177