xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision fd1e77d9)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147 
148 enum ta_ras_gfx_subblock {
149 	/*CPC*/
150 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 	TA_RAS_BLOCK__GFX_CPC_UCODE,
153 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 	/* CPF*/
161 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 	TA_RAS_BLOCK__GFX_CPF_TAG,
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 	/* CPG*/
167 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 	TA_RAS_BLOCK__GFX_CPG_TAG,
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 	/* GDS*/
173 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 	/* SPI*/
181 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 	/* SQ*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 	/* SQC (3 ranges)*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	/* SQC range 0*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	/* SQC range 1*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	/* SQC range 2*/
218 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 	/* TA*/
233 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 	/* TCA*/
241 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 	/* TCC (5 sub-ranges)*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	/* TCC range 0*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 	/* TCC range 1*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	/* TCC range 2*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	/* TCC range 3*/
277 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	/* TCC range 4*/
283 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 	/* TCI*/
291 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 	/* TCP*/
293 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 	/* TD*/
303 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 	/* EA (3 sub-ranges)*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	/* EA range 0*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 	/* EA range 1*/
322 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 	/* EA range 2*/
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 	/* UTC VM L2 bank*/
340 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 	/* UTC VM walker*/
342 	TA_RAS_BLOCK__UTC_VML2_WALKER,
343 	/* UTC ATC L2 2MB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 	/* UTC ATC L2 4KB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 	TA_RAS_BLOCK__GFX_MAX
348 };
349 
350 struct ras_gfx_subblock {
351 	unsigned char *name;
352 	int ta_subblock;
353 	int hw_supported_error_type;
354 	int sw_supported_error_type;
355 };
356 
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359 		#subblock,                                                     \
360 		TA_RAS_BLOCK__##subblock,                                      \
361 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363 	}
364 
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 			     0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 			     1),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 			     0, 0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 			     0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 			     0, 0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 			     0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 			     0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 			     0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 			     0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538 
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560 
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575 
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603 
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614 
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637 
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653 
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680 
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697 
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712 
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717 
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729 
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741 
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
746 
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752 				struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757 					  void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
759 				     void *inject_if);
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
761 
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
763 				uint64_t queue_mask)
764 {
765 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766 	amdgpu_ring_write(kiq_ring,
767 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
768 		/* vmid_mask:0* queue_type:0 (KIQ) */
769 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770 	amdgpu_ring_write(kiq_ring,
771 			lower_32_bits(queue_mask));	/* queue mask lo */
772 	amdgpu_ring_write(kiq_ring,
773 			upper_32_bits(queue_mask));	/* queue mask hi */
774 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
775 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
776 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
777 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
778 }
779 
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781 				 struct amdgpu_ring *ring)
782 {
783 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
784 	uint64_t wptr_addr = ring->wptr_gpu_addr;
785 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
786 
787 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
788 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
789 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
790 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
791 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
792 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
793 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
794 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
795 			 /*queue_type: normal compute queue */
796 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
797 			 /* alloc format: all_on_one_pipe */
798 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
799 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
800 			 /* num_queues: must be 1 */
801 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
802 	amdgpu_ring_write(kiq_ring,
803 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
804 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
805 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
806 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
807 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
808 }
809 
810 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
811 				   struct amdgpu_ring *ring,
812 				   enum amdgpu_unmap_queues_action action,
813 				   u64 gpu_addr, u64 seq)
814 {
815 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
816 
817 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
818 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
819 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
820 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
821 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
822 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
823 	amdgpu_ring_write(kiq_ring,
824 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
825 
826 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
827 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
828 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
829 		amdgpu_ring_write(kiq_ring, seq);
830 	} else {
831 		amdgpu_ring_write(kiq_ring, 0);
832 		amdgpu_ring_write(kiq_ring, 0);
833 		amdgpu_ring_write(kiq_ring, 0);
834 	}
835 }
836 
837 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
838 				   struct amdgpu_ring *ring,
839 				   u64 addr,
840 				   u64 seq)
841 {
842 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
843 
844 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
845 	amdgpu_ring_write(kiq_ring,
846 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
847 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
848 			  PACKET3_QUERY_STATUS_COMMAND(2));
849 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
850 	amdgpu_ring_write(kiq_ring,
851 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
852 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
853 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
854 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
855 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
856 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
857 }
858 
859 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
860 				uint16_t pasid, uint32_t flush_type,
861 				bool all_hub)
862 {
863 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
864 	amdgpu_ring_write(kiq_ring,
865 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
866 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
867 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
868 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
869 }
870 
871 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
872 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
873 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
874 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
875 	.kiq_query_status = gfx_v9_0_kiq_query_status,
876 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
877 	.set_resources_size = 8,
878 	.map_queues_size = 7,
879 	.unmap_queues_size = 6,
880 	.query_status_size = 7,
881 	.invalidate_tlbs_size = 2,
882 };
883 
884 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
885 {
886 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
887 }
888 
889 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
890 {
891 	switch (adev->ip_versions[GC_HWIP][0]) {
892 	case IP_VERSION(9, 0, 1):
893 		soc15_program_register_sequence(adev,
894 						golden_settings_gc_9_0,
895 						ARRAY_SIZE(golden_settings_gc_9_0));
896 		soc15_program_register_sequence(adev,
897 						golden_settings_gc_9_0_vg10,
898 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
899 		break;
900 	case IP_VERSION(9, 2, 1):
901 		soc15_program_register_sequence(adev,
902 						golden_settings_gc_9_2_1,
903 						ARRAY_SIZE(golden_settings_gc_9_2_1));
904 		soc15_program_register_sequence(adev,
905 						golden_settings_gc_9_2_1_vg12,
906 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
907 		break;
908 	case IP_VERSION(9, 4, 0):
909 		soc15_program_register_sequence(adev,
910 						golden_settings_gc_9_0,
911 						ARRAY_SIZE(golden_settings_gc_9_0));
912 		soc15_program_register_sequence(adev,
913 						golden_settings_gc_9_0_vg20,
914 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
915 		break;
916 	case IP_VERSION(9, 4, 1):
917 		soc15_program_register_sequence(adev,
918 						golden_settings_gc_9_4_1_arct,
919 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
920 		break;
921 	case IP_VERSION(9, 2, 2):
922 	case IP_VERSION(9, 1, 0):
923 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
924 						ARRAY_SIZE(golden_settings_gc_9_1));
925 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
926 			soc15_program_register_sequence(adev,
927 							golden_settings_gc_9_1_rv2,
928 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
929 		else
930 			soc15_program_register_sequence(adev,
931 							golden_settings_gc_9_1_rv1,
932 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
933 		break;
934 	 case IP_VERSION(9, 3, 0):
935 		soc15_program_register_sequence(adev,
936 						golden_settings_gc_9_1_rn,
937 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
938 		return; /* for renoir, don't need common goldensetting */
939 	case IP_VERSION(9, 4, 2):
940 		gfx_v9_4_2_init_golden_registers(adev,
941 						 adev->smuio.funcs->get_die_id(adev));
942 		break;
943 	default:
944 		break;
945 	}
946 
947 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
948 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
949 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
950 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
951 }
952 
953 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
954 {
955 	adev->gfx.scratch.num_reg = 8;
956 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
957 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
958 }
959 
960 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
961 				       bool wc, uint32_t reg, uint32_t val)
962 {
963 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
964 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
965 				WRITE_DATA_DST_SEL(0) |
966 				(wc ? WR_CONFIRM : 0));
967 	amdgpu_ring_write(ring, reg);
968 	amdgpu_ring_write(ring, 0);
969 	amdgpu_ring_write(ring, val);
970 }
971 
972 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
973 				  int mem_space, int opt, uint32_t addr0,
974 				  uint32_t addr1, uint32_t ref, uint32_t mask,
975 				  uint32_t inv)
976 {
977 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
978 	amdgpu_ring_write(ring,
979 				 /* memory (1) or register (0) */
980 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
981 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
982 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
983 				 WAIT_REG_MEM_ENGINE(eng_sel)));
984 
985 	if (mem_space)
986 		BUG_ON(addr0 & 0x3); /* Dword align */
987 	amdgpu_ring_write(ring, addr0);
988 	amdgpu_ring_write(ring, addr1);
989 	amdgpu_ring_write(ring, ref);
990 	amdgpu_ring_write(ring, mask);
991 	amdgpu_ring_write(ring, inv); /* poll interval */
992 }
993 
994 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
995 {
996 	struct amdgpu_device *adev = ring->adev;
997 	uint32_t scratch;
998 	uint32_t tmp = 0;
999 	unsigned i;
1000 	int r;
1001 
1002 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1003 	if (r)
1004 		return r;
1005 
1006 	WREG32(scratch, 0xCAFEDEAD);
1007 	r = amdgpu_ring_alloc(ring, 3);
1008 	if (r)
1009 		goto error_free_scratch;
1010 
1011 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1012 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1013 	amdgpu_ring_write(ring, 0xDEADBEEF);
1014 	amdgpu_ring_commit(ring);
1015 
1016 	for (i = 0; i < adev->usec_timeout; i++) {
1017 		tmp = RREG32(scratch);
1018 		if (tmp == 0xDEADBEEF)
1019 			break;
1020 		udelay(1);
1021 	}
1022 
1023 	if (i >= adev->usec_timeout)
1024 		r = -ETIMEDOUT;
1025 
1026 error_free_scratch:
1027 	amdgpu_gfx_scratch_free(adev, scratch);
1028 	return r;
1029 }
1030 
1031 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1032 {
1033 	struct amdgpu_device *adev = ring->adev;
1034 	struct amdgpu_ib ib;
1035 	struct dma_fence *f = NULL;
1036 
1037 	unsigned index;
1038 	uint64_t gpu_addr;
1039 	uint32_t tmp;
1040 	long r;
1041 
1042 	r = amdgpu_device_wb_get(adev, &index);
1043 	if (r)
1044 		return r;
1045 
1046 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1047 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1048 	memset(&ib, 0, sizeof(ib));
1049 	r = amdgpu_ib_get(adev, NULL, 16,
1050 					AMDGPU_IB_POOL_DIRECT, &ib);
1051 	if (r)
1052 		goto err1;
1053 
1054 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1055 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1056 	ib.ptr[2] = lower_32_bits(gpu_addr);
1057 	ib.ptr[3] = upper_32_bits(gpu_addr);
1058 	ib.ptr[4] = 0xDEADBEEF;
1059 	ib.length_dw = 5;
1060 
1061 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1062 	if (r)
1063 		goto err2;
1064 
1065 	r = dma_fence_wait_timeout(f, false, timeout);
1066 	if (r == 0) {
1067 		r = -ETIMEDOUT;
1068 		goto err2;
1069 	} else if (r < 0) {
1070 		goto err2;
1071 	}
1072 
1073 	tmp = adev->wb.wb[index];
1074 	if (tmp == 0xDEADBEEF)
1075 		r = 0;
1076 	else
1077 		r = -EINVAL;
1078 
1079 err2:
1080 	amdgpu_ib_free(adev, &ib, NULL);
1081 	dma_fence_put(f);
1082 err1:
1083 	amdgpu_device_wb_free(adev, index);
1084 	return r;
1085 }
1086 
1087 
1088 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1089 {
1090 	release_firmware(adev->gfx.pfp_fw);
1091 	adev->gfx.pfp_fw = NULL;
1092 	release_firmware(adev->gfx.me_fw);
1093 	adev->gfx.me_fw = NULL;
1094 	release_firmware(adev->gfx.ce_fw);
1095 	adev->gfx.ce_fw = NULL;
1096 	release_firmware(adev->gfx.rlc_fw);
1097 	adev->gfx.rlc_fw = NULL;
1098 	release_firmware(adev->gfx.mec_fw);
1099 	adev->gfx.mec_fw = NULL;
1100 	release_firmware(adev->gfx.mec2_fw);
1101 	adev->gfx.mec2_fw = NULL;
1102 
1103 	kfree(adev->gfx.rlc.register_list_format);
1104 }
1105 
1106 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1107 {
1108 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1109 
1110 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1111 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1112 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1113 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1114 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1115 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1116 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1117 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1118 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1119 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1120 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1121 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1122 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1123 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1124 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1125 }
1126 
1127 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1128 {
1129 	adev->gfx.me_fw_write_wait = false;
1130 	adev->gfx.mec_fw_write_wait = false;
1131 
1132 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1133 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1134 	    (adev->gfx.mec_feature_version < 46) ||
1135 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1136 	    (adev->gfx.pfp_feature_version < 46)))
1137 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1138 
1139 	switch (adev->ip_versions[GC_HWIP][0]) {
1140 	case IP_VERSION(9, 0, 1):
1141 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1142 		    (adev->gfx.me_feature_version >= 42) &&
1143 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1144 		    (adev->gfx.pfp_feature_version >= 42))
1145 			adev->gfx.me_fw_write_wait = true;
1146 
1147 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1148 		    (adev->gfx.mec_feature_version >= 42))
1149 			adev->gfx.mec_fw_write_wait = true;
1150 		break;
1151 	case IP_VERSION(9, 2, 1):
1152 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1153 		    (adev->gfx.me_feature_version >= 44) &&
1154 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1155 		    (adev->gfx.pfp_feature_version >= 44))
1156 			adev->gfx.me_fw_write_wait = true;
1157 
1158 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1159 		    (adev->gfx.mec_feature_version >= 44))
1160 			adev->gfx.mec_fw_write_wait = true;
1161 		break;
1162 	case IP_VERSION(9, 4, 0):
1163 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1164 		    (adev->gfx.me_feature_version >= 44) &&
1165 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1166 		    (adev->gfx.pfp_feature_version >= 44))
1167 			adev->gfx.me_fw_write_wait = true;
1168 
1169 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1170 		    (adev->gfx.mec_feature_version >= 44))
1171 			adev->gfx.mec_fw_write_wait = true;
1172 		break;
1173 	case IP_VERSION(9, 1, 0):
1174 	case IP_VERSION(9, 2, 2):
1175 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1176 		    (adev->gfx.me_feature_version >= 42) &&
1177 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1178 		    (adev->gfx.pfp_feature_version >= 42))
1179 			adev->gfx.me_fw_write_wait = true;
1180 
1181 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1182 		    (adev->gfx.mec_feature_version >= 42))
1183 			adev->gfx.mec_fw_write_wait = true;
1184 		break;
1185 	default:
1186 		adev->gfx.me_fw_write_wait = true;
1187 		adev->gfx.mec_fw_write_wait = true;
1188 		break;
1189 	}
1190 }
1191 
1192 struct amdgpu_gfxoff_quirk {
1193 	u16 chip_vendor;
1194 	u16 chip_device;
1195 	u16 subsys_vendor;
1196 	u16 subsys_device;
1197 	u8 revision;
1198 };
1199 
1200 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1201 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1202 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1203 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1204 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1205 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1206 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1207 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1208 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1209 	{ 0, 0, 0, 0, 0 },
1210 };
1211 
1212 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1213 {
1214 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1215 
1216 	while (p && p->chip_device != 0) {
1217 		if (pdev->vendor == p->chip_vendor &&
1218 		    pdev->device == p->chip_device &&
1219 		    pdev->subsystem_vendor == p->subsys_vendor &&
1220 		    pdev->subsystem_device == p->subsys_device &&
1221 		    pdev->revision == p->revision) {
1222 			return true;
1223 		}
1224 		++p;
1225 	}
1226 	return false;
1227 }
1228 
1229 static bool is_raven_kicker(struct amdgpu_device *adev)
1230 {
1231 	if (adev->pm.fw_version >= 0x41e2b)
1232 		return true;
1233 	else
1234 		return false;
1235 }
1236 
1237 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1238 {
1239 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1240 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1241 	    (adev->gfx.me_feature_version >= 52))
1242 		return true;
1243 	else
1244 		return false;
1245 }
1246 
1247 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1248 {
1249 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1250 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1251 
1252 	switch (adev->ip_versions[GC_HWIP][0]) {
1253 	case IP_VERSION(9, 0, 1):
1254 	case IP_VERSION(9, 2, 1):
1255 	case IP_VERSION(9, 4, 0):
1256 		break;
1257 	case IP_VERSION(9, 2, 2):
1258 	case IP_VERSION(9, 1, 0):
1259 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1260 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1261 		    ((!is_raven_kicker(adev) &&
1262 		      adev->gfx.rlc_fw_version < 531) ||
1263 		     (adev->gfx.rlc_feature_version < 1) ||
1264 		     !adev->gfx.rlc.is_rlc_v2_1))
1265 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1266 
1267 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1268 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1269 				AMD_PG_SUPPORT_CP |
1270 				AMD_PG_SUPPORT_RLC_SMU_HS;
1271 		break;
1272 	case IP_VERSION(9, 3, 0):
1273 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1274 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1275 				AMD_PG_SUPPORT_CP |
1276 				AMD_PG_SUPPORT_RLC_SMU_HS;
1277 		break;
1278 	default:
1279 		break;
1280 	}
1281 }
1282 
1283 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1284 					  const char *chip_name)
1285 {
1286 	char fw_name[30];
1287 	int err;
1288 	struct amdgpu_firmware_info *info = NULL;
1289 	const struct common_firmware_header *header = NULL;
1290 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1291 
1292 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1293 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1294 	if (err)
1295 		goto out;
1296 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1297 	if (err)
1298 		goto out;
1299 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1300 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1301 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1302 
1303 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1304 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1305 	if (err)
1306 		goto out;
1307 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1308 	if (err)
1309 		goto out;
1310 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1311 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1312 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1313 
1314 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1315 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1316 	if (err)
1317 		goto out;
1318 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1319 	if (err)
1320 		goto out;
1321 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1322 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1323 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1324 
1325 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1326 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1327 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1328 		info->fw = adev->gfx.pfp_fw;
1329 		header = (const struct common_firmware_header *)info->fw->data;
1330 		adev->firmware.fw_size +=
1331 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1332 
1333 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1334 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1335 		info->fw = adev->gfx.me_fw;
1336 		header = (const struct common_firmware_header *)info->fw->data;
1337 		adev->firmware.fw_size +=
1338 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1339 
1340 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1341 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1342 		info->fw = adev->gfx.ce_fw;
1343 		header = (const struct common_firmware_header *)info->fw->data;
1344 		adev->firmware.fw_size +=
1345 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1346 	}
1347 
1348 out:
1349 	if (err) {
1350 		dev_err(adev->dev,
1351 			"gfx9: Failed to load firmware \"%s\"\n",
1352 			fw_name);
1353 		release_firmware(adev->gfx.pfp_fw);
1354 		adev->gfx.pfp_fw = NULL;
1355 		release_firmware(adev->gfx.me_fw);
1356 		adev->gfx.me_fw = NULL;
1357 		release_firmware(adev->gfx.ce_fw);
1358 		adev->gfx.ce_fw = NULL;
1359 	}
1360 	return err;
1361 }
1362 
1363 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1364 					  const char *chip_name)
1365 {
1366 	char fw_name[30];
1367 	int err;
1368 	struct amdgpu_firmware_info *info = NULL;
1369 	const struct common_firmware_header *header = NULL;
1370 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1371 	unsigned int *tmp = NULL;
1372 	unsigned int i = 0;
1373 	uint16_t version_major;
1374 	uint16_t version_minor;
1375 	uint32_t smu_version;
1376 
1377 	/*
1378 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1379 	 * instead of picasso_rlc.bin.
1380 	 * Judgment method:
1381 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1382 	 *          or revision >= 0xD8 && revision <= 0xDF
1383 	 * otherwise is PCO FP5
1384 	 */
1385 	if (!strcmp(chip_name, "picasso") &&
1386 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1387 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1388 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1389 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1390 		(smu_version >= 0x41e2b))
1391 		/**
1392 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1393 		*/
1394 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1395 	else
1396 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1397 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1398 	if (err)
1399 		goto out;
1400 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1401 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1402 
1403 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1404 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1405 	if (version_major == 2 && version_minor == 1)
1406 		adev->gfx.rlc.is_rlc_v2_1 = true;
1407 
1408 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1409 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1410 	adev->gfx.rlc.save_and_restore_offset =
1411 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1412 	adev->gfx.rlc.clear_state_descriptor_offset =
1413 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1414 	adev->gfx.rlc.avail_scratch_ram_locations =
1415 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1416 	adev->gfx.rlc.reg_restore_list_size =
1417 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1418 	adev->gfx.rlc.reg_list_format_start =
1419 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1420 	adev->gfx.rlc.reg_list_format_separate_start =
1421 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1422 	adev->gfx.rlc.starting_offsets_start =
1423 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1424 	adev->gfx.rlc.reg_list_format_size_bytes =
1425 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1426 	adev->gfx.rlc.reg_list_size_bytes =
1427 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1428 	adev->gfx.rlc.register_list_format =
1429 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1430 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1431 	if (!adev->gfx.rlc.register_list_format) {
1432 		err = -ENOMEM;
1433 		goto out;
1434 	}
1435 
1436 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1437 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1438 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1439 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1440 
1441 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1442 
1443 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1444 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1445 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1446 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1447 
1448 	if (adev->gfx.rlc.is_rlc_v2_1)
1449 		gfx_v9_0_init_rlc_ext_microcode(adev);
1450 
1451 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1452 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1453 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1454 		info->fw = adev->gfx.rlc_fw;
1455 		header = (const struct common_firmware_header *)info->fw->data;
1456 		adev->firmware.fw_size +=
1457 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1458 
1459 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1460 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1461 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1462 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1463 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1464 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1465 			info->fw = adev->gfx.rlc_fw;
1466 			adev->firmware.fw_size +=
1467 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1468 
1469 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1470 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1471 			info->fw = adev->gfx.rlc_fw;
1472 			adev->firmware.fw_size +=
1473 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1474 
1475 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1476 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1477 			info->fw = adev->gfx.rlc_fw;
1478 			adev->firmware.fw_size +=
1479 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1480 		}
1481 	}
1482 
1483 out:
1484 	if (err) {
1485 		dev_err(adev->dev,
1486 			"gfx9: Failed to load firmware \"%s\"\n",
1487 			fw_name);
1488 		release_firmware(adev->gfx.rlc_fw);
1489 		adev->gfx.rlc_fw = NULL;
1490 	}
1491 	return err;
1492 }
1493 
1494 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1495 {
1496 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1497 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1498 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1499 		return false;
1500 
1501 	return true;
1502 }
1503 
1504 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1505 					  const char *chip_name)
1506 {
1507 	char fw_name[30];
1508 	int err;
1509 	struct amdgpu_firmware_info *info = NULL;
1510 	const struct common_firmware_header *header = NULL;
1511 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1512 
1513 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1514 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1515 	if (err)
1516 		goto out;
1517 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1518 	if (err)
1519 		goto out;
1520 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1521 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1522 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1523 
1524 
1525 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1526 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1527 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1528 		if (!err) {
1529 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1530 			if (err)
1531 				goto out;
1532 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1533 			adev->gfx.mec2_fw->data;
1534 			adev->gfx.mec2_fw_version =
1535 			le32_to_cpu(cp_hdr->header.ucode_version);
1536 			adev->gfx.mec2_feature_version =
1537 			le32_to_cpu(cp_hdr->ucode_feature_version);
1538 		} else {
1539 			err = 0;
1540 			adev->gfx.mec2_fw = NULL;
1541 		}
1542 	} else {
1543 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1544 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1545 	}
1546 
1547 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1548 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1549 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1550 		info->fw = adev->gfx.mec_fw;
1551 		header = (const struct common_firmware_header *)info->fw->data;
1552 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1553 		adev->firmware.fw_size +=
1554 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1555 
1556 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1557 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1558 		info->fw = adev->gfx.mec_fw;
1559 		adev->firmware.fw_size +=
1560 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1561 
1562 		if (adev->gfx.mec2_fw) {
1563 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1564 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1565 			info->fw = adev->gfx.mec2_fw;
1566 			header = (const struct common_firmware_header *)info->fw->data;
1567 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1568 			adev->firmware.fw_size +=
1569 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1570 
1571 			/* TODO: Determine if MEC2 JT FW loading can be removed
1572 				 for all GFX V9 asic and above */
1573 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1574 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1575 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1576 				info->fw = adev->gfx.mec2_fw;
1577 				adev->firmware.fw_size +=
1578 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1579 					PAGE_SIZE);
1580 			}
1581 		}
1582 	}
1583 
1584 out:
1585 	gfx_v9_0_check_if_need_gfxoff(adev);
1586 	gfx_v9_0_check_fw_write_wait(adev);
1587 	if (err) {
1588 		dev_err(adev->dev,
1589 			"gfx9: Failed to load firmware \"%s\"\n",
1590 			fw_name);
1591 		release_firmware(adev->gfx.mec_fw);
1592 		adev->gfx.mec_fw = NULL;
1593 		release_firmware(adev->gfx.mec2_fw);
1594 		adev->gfx.mec2_fw = NULL;
1595 	}
1596 	return err;
1597 }
1598 
1599 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1600 {
1601 	const char *chip_name;
1602 	int r;
1603 
1604 	DRM_DEBUG("\n");
1605 
1606 	switch (adev->ip_versions[GC_HWIP][0]) {
1607 	case IP_VERSION(9, 0, 1):
1608 		chip_name = "vega10";
1609 		break;
1610 	case IP_VERSION(9, 2, 1):
1611 		chip_name = "vega12";
1612 		break;
1613 	case IP_VERSION(9, 4, 0):
1614 		chip_name = "vega20";
1615 		break;
1616 	case IP_VERSION(9, 2, 2):
1617 	case IP_VERSION(9, 1, 0):
1618 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1619 			chip_name = "raven2";
1620 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1621 			chip_name = "picasso";
1622 		else
1623 			chip_name = "raven";
1624 		break;
1625 	case IP_VERSION(9, 4, 1):
1626 		chip_name = "arcturus";
1627 		break;
1628 	case IP_VERSION(9, 3, 0):
1629 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1630 			chip_name = "renoir";
1631 		else
1632 			chip_name = "green_sardine";
1633 		break;
1634 	case IP_VERSION(9, 4, 2):
1635 		chip_name = "aldebaran";
1636 		break;
1637 	default:
1638 		BUG();
1639 	}
1640 
1641 	/* No CPG in Arcturus */
1642 	if (adev->gfx.num_gfx_rings) {
1643 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1644 		if (r)
1645 			return r;
1646 	}
1647 
1648 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1649 	if (r)
1650 		return r;
1651 
1652 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1653 	if (r)
1654 		return r;
1655 
1656 	return r;
1657 }
1658 
1659 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1660 {
1661 	u32 count = 0;
1662 	const struct cs_section_def *sect = NULL;
1663 	const struct cs_extent_def *ext = NULL;
1664 
1665 	/* begin clear state */
1666 	count += 2;
1667 	/* context control state */
1668 	count += 3;
1669 
1670 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1671 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1672 			if (sect->id == SECT_CONTEXT)
1673 				count += 2 + ext->reg_count;
1674 			else
1675 				return 0;
1676 		}
1677 	}
1678 
1679 	/* end clear state */
1680 	count += 2;
1681 	/* clear state */
1682 	count += 2;
1683 
1684 	return count;
1685 }
1686 
1687 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1688 				    volatile u32 *buffer)
1689 {
1690 	u32 count = 0, i;
1691 	const struct cs_section_def *sect = NULL;
1692 	const struct cs_extent_def *ext = NULL;
1693 
1694 	if (adev->gfx.rlc.cs_data == NULL)
1695 		return;
1696 	if (buffer == NULL)
1697 		return;
1698 
1699 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1700 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1701 
1702 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1703 	buffer[count++] = cpu_to_le32(0x80000000);
1704 	buffer[count++] = cpu_to_le32(0x80000000);
1705 
1706 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1707 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1708 			if (sect->id == SECT_CONTEXT) {
1709 				buffer[count++] =
1710 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1711 				buffer[count++] = cpu_to_le32(ext->reg_index -
1712 						PACKET3_SET_CONTEXT_REG_START);
1713 				for (i = 0; i < ext->reg_count; i++)
1714 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1715 			} else {
1716 				return;
1717 			}
1718 		}
1719 	}
1720 
1721 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1722 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1723 
1724 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1725 	buffer[count++] = cpu_to_le32(0);
1726 }
1727 
1728 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1729 {
1730 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1731 	uint32_t pg_always_on_cu_num = 2;
1732 	uint32_t always_on_cu_num;
1733 	uint32_t i, j, k;
1734 	uint32_t mask, cu_bitmap, counter;
1735 
1736 	if (adev->flags & AMD_IS_APU)
1737 		always_on_cu_num = 4;
1738 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1739 		always_on_cu_num = 8;
1740 	else
1741 		always_on_cu_num = 12;
1742 
1743 	mutex_lock(&adev->grbm_idx_mutex);
1744 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1745 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1746 			mask = 1;
1747 			cu_bitmap = 0;
1748 			counter = 0;
1749 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1750 
1751 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1752 				if (cu_info->bitmap[i][j] & mask) {
1753 					if (counter == pg_always_on_cu_num)
1754 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1755 					if (counter < always_on_cu_num)
1756 						cu_bitmap |= mask;
1757 					else
1758 						break;
1759 					counter++;
1760 				}
1761 				mask <<= 1;
1762 			}
1763 
1764 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1765 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1766 		}
1767 	}
1768 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1769 	mutex_unlock(&adev->grbm_idx_mutex);
1770 }
1771 
1772 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1773 {
1774 	uint32_t data;
1775 
1776 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1777 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1778 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1779 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1780 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1781 
1782 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1783 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1784 
1785 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1786 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1787 
1788 	mutex_lock(&adev->grbm_idx_mutex);
1789 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1790 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1791 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1792 
1793 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1794 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1795 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1796 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1797 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1798 
1799 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1800 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1801 	data &= 0x0000FFFF;
1802 	data |= 0x00C00000;
1803 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1804 
1805 	/*
1806 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1807 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1808 	 */
1809 
1810 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1811 	 * but used for RLC_LB_CNTL configuration */
1812 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1813 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1814 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1815 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1816 	mutex_unlock(&adev->grbm_idx_mutex);
1817 
1818 	gfx_v9_0_init_always_on_cu_mask(adev);
1819 }
1820 
1821 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1822 {
1823 	uint32_t data;
1824 
1825 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1826 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1827 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1828 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1829 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1830 
1831 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1832 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1833 
1834 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1835 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1836 
1837 	mutex_lock(&adev->grbm_idx_mutex);
1838 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1839 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1840 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1841 
1842 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1843 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1844 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1845 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1846 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1847 
1848 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1849 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1850 	data &= 0x0000FFFF;
1851 	data |= 0x00C00000;
1852 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1853 
1854 	/*
1855 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1856 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1857 	 */
1858 
1859 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1860 	 * but used for RLC_LB_CNTL configuration */
1861 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1862 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1863 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1864 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1865 	mutex_unlock(&adev->grbm_idx_mutex);
1866 
1867 	gfx_v9_0_init_always_on_cu_mask(adev);
1868 }
1869 
1870 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1871 {
1872 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1873 }
1874 
1875 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1876 {
1877 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1878 		return 5;
1879 	else
1880 		return 4;
1881 }
1882 
1883 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1884 {
1885 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1886 
1887 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1888 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1889 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1890 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1891 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1892 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1893 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1894 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1895 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1896 }
1897 
1898 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1899 {
1900 	const struct cs_section_def *cs_data;
1901 	int r;
1902 
1903 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1904 
1905 	cs_data = adev->gfx.rlc.cs_data;
1906 
1907 	if (cs_data) {
1908 		/* init clear state block */
1909 		r = amdgpu_gfx_rlc_init_csb(adev);
1910 		if (r)
1911 			return r;
1912 	}
1913 
1914 	if (adev->flags & AMD_IS_APU) {
1915 		/* TODO: double check the cp_table_size for RV */
1916 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1917 		r = amdgpu_gfx_rlc_init_cpt(adev);
1918 		if (r)
1919 			return r;
1920 	}
1921 
1922 	switch (adev->ip_versions[GC_HWIP][0]) {
1923 	case IP_VERSION(9, 2, 2):
1924 	case IP_VERSION(9, 1, 0):
1925 		gfx_v9_0_init_lbpw(adev);
1926 		break;
1927 	case IP_VERSION(9, 4, 0):
1928 		gfx_v9_4_init_lbpw(adev);
1929 		break;
1930 	default:
1931 		break;
1932 	}
1933 
1934 	/* init spm vmid with 0xf */
1935 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1936 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1937 
1938 	return 0;
1939 }
1940 
1941 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1942 {
1943 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1944 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1945 }
1946 
1947 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1948 {
1949 	int r;
1950 	u32 *hpd;
1951 	const __le32 *fw_data;
1952 	unsigned fw_size;
1953 	u32 *fw;
1954 	size_t mec_hpd_size;
1955 
1956 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1957 
1958 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1959 
1960 	/* take ownership of the relevant compute queues */
1961 	amdgpu_gfx_compute_queue_acquire(adev);
1962 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1963 	if (mec_hpd_size) {
1964 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1965 					      AMDGPU_GEM_DOMAIN_VRAM,
1966 					      &adev->gfx.mec.hpd_eop_obj,
1967 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1968 					      (void **)&hpd);
1969 		if (r) {
1970 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1971 			gfx_v9_0_mec_fini(adev);
1972 			return r;
1973 		}
1974 
1975 		memset(hpd, 0, mec_hpd_size);
1976 
1977 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1978 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1979 	}
1980 
1981 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1982 
1983 	fw_data = (const __le32 *)
1984 		(adev->gfx.mec_fw->data +
1985 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1986 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1987 
1988 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1989 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1990 				      &adev->gfx.mec.mec_fw_obj,
1991 				      &adev->gfx.mec.mec_fw_gpu_addr,
1992 				      (void **)&fw);
1993 	if (r) {
1994 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1995 		gfx_v9_0_mec_fini(adev);
1996 		return r;
1997 	}
1998 
1999 	memcpy(fw, fw_data, fw_size);
2000 
2001 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2002 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2003 
2004 	return 0;
2005 }
2006 
2007 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2008 {
2009 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2010 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2011 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2012 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2013 		(SQ_IND_INDEX__FORCE_READ_MASK));
2014 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2015 }
2016 
2017 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2018 			   uint32_t wave, uint32_t thread,
2019 			   uint32_t regno, uint32_t num, uint32_t *out)
2020 {
2021 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2022 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2023 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2024 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2025 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2026 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2027 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2028 	while (num--)
2029 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2030 }
2031 
2032 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2033 {
2034 	/* type 1 wave data */
2035 	dst[(*no_fields)++] = 1;
2036 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2037 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2038 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2039 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2040 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2041 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2042 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2043 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2044 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2045 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2046 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2047 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2048 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2049 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2050 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2051 }
2052 
2053 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2054 				     uint32_t wave, uint32_t start,
2055 				     uint32_t size, uint32_t *dst)
2056 {
2057 	wave_read_regs(
2058 		adev, simd, wave, 0,
2059 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2060 }
2061 
2062 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2063 				     uint32_t wave, uint32_t thread,
2064 				     uint32_t start, uint32_t size,
2065 				     uint32_t *dst)
2066 {
2067 	wave_read_regs(
2068 		adev, simd, wave, thread,
2069 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2070 }
2071 
2072 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2073 				  u32 me, u32 pipe, u32 q, u32 vm)
2074 {
2075 	soc15_grbm_select(adev, me, pipe, q, vm);
2076 }
2077 
2078 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2079         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2080         .select_se_sh = &gfx_v9_0_select_se_sh,
2081         .read_wave_data = &gfx_v9_0_read_wave_data,
2082         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2083         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2084         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2085 };
2086 
2087 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2088 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2089 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2090 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2091 };
2092 
2093 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2094 	.ras_block = {
2095 		.hw_ops = &gfx_v9_0_ras_ops,
2096 	},
2097 };
2098 
2099 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2100 {
2101 	u32 gb_addr_config;
2102 	int err;
2103 
2104 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2105 
2106 	switch (adev->ip_versions[GC_HWIP][0]) {
2107 	case IP_VERSION(9, 0, 1):
2108 		adev->gfx.config.max_hw_contexts = 8;
2109 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2110 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2111 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2112 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2113 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2114 		break;
2115 	case IP_VERSION(9, 2, 1):
2116 		adev->gfx.config.max_hw_contexts = 8;
2117 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2118 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2119 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2120 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2121 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2122 		DRM_INFO("fix gfx.config for vega12\n");
2123 		break;
2124 	case IP_VERSION(9, 4, 0):
2125 		adev->gfx.ras = &gfx_v9_0_ras;
2126 		adev->gfx.config.max_hw_contexts = 8;
2127 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2128 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2129 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2130 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2131 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2132 		gb_addr_config &= ~0xf3e777ff;
2133 		gb_addr_config |= 0x22014042;
2134 		/* check vbios table if gpu info is not available */
2135 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2136 		if (err)
2137 			return err;
2138 		break;
2139 	case IP_VERSION(9, 2, 2):
2140 	case IP_VERSION(9, 1, 0):
2141 		adev->gfx.config.max_hw_contexts = 8;
2142 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2143 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2144 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2145 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2146 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2147 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2148 		else
2149 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2150 		break;
2151 	case IP_VERSION(9, 4, 1):
2152 		adev->gfx.ras = &gfx_v9_4_ras;
2153 		adev->gfx.config.max_hw_contexts = 8;
2154 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2155 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2156 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2157 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2158 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2159 		gb_addr_config &= ~0xf3e777ff;
2160 		gb_addr_config |= 0x22014042;
2161 		break;
2162 	case IP_VERSION(9, 3, 0):
2163 		adev->gfx.config.max_hw_contexts = 8;
2164 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2165 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2166 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2167 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2168 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2169 		gb_addr_config &= ~0xf3e777ff;
2170 		gb_addr_config |= 0x22010042;
2171 		break;
2172 	case IP_VERSION(9, 4, 2):
2173 		adev->gfx.ras = &gfx_v9_4_2_ras;
2174 		adev->gfx.config.max_hw_contexts = 8;
2175 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2176 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2177 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2178 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2179 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2180 		gb_addr_config &= ~0xf3e777ff;
2181 		gb_addr_config |= 0x22014042;
2182 		/* check vbios table if gpu info is not available */
2183 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2184 		if (err)
2185 			return err;
2186 		break;
2187 	default:
2188 		BUG();
2189 		break;
2190 	}
2191 
2192 	if (adev->gfx.ras) {
2193 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2194 		if (err) {
2195 			DRM_ERROR("Failed to register gfx ras block!\n");
2196 			return err;
2197 		}
2198 
2199 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2200 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2201 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2202 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2203 
2204 		/* If not define special ras_late_init function, use gfx default ras_late_init */
2205 		if (!adev->gfx.ras->ras_block.ras_late_init)
2206 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2207 
2208 		/* If not defined special ras_cb function, use default ras_cb */
2209 		if (!adev->gfx.ras->ras_block.ras_cb)
2210 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2211 	}
2212 
2213 	adev->gfx.config.gb_addr_config = gb_addr_config;
2214 
2215 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2216 			REG_GET_FIELD(
2217 					adev->gfx.config.gb_addr_config,
2218 					GB_ADDR_CONFIG,
2219 					NUM_PIPES);
2220 
2221 	adev->gfx.config.max_tile_pipes =
2222 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2223 
2224 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2225 			REG_GET_FIELD(
2226 					adev->gfx.config.gb_addr_config,
2227 					GB_ADDR_CONFIG,
2228 					NUM_BANKS);
2229 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2230 			REG_GET_FIELD(
2231 					adev->gfx.config.gb_addr_config,
2232 					GB_ADDR_CONFIG,
2233 					MAX_COMPRESSED_FRAGS);
2234 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2235 			REG_GET_FIELD(
2236 					adev->gfx.config.gb_addr_config,
2237 					GB_ADDR_CONFIG,
2238 					NUM_RB_PER_SE);
2239 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2240 			REG_GET_FIELD(
2241 					adev->gfx.config.gb_addr_config,
2242 					GB_ADDR_CONFIG,
2243 					NUM_SHADER_ENGINES);
2244 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2245 			REG_GET_FIELD(
2246 					adev->gfx.config.gb_addr_config,
2247 					GB_ADDR_CONFIG,
2248 					PIPE_INTERLEAVE_SIZE));
2249 
2250 	return 0;
2251 }
2252 
2253 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2254 				      int mec, int pipe, int queue)
2255 {
2256 	unsigned irq_type;
2257 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2258 	unsigned int hw_prio;
2259 
2260 	ring = &adev->gfx.compute_ring[ring_id];
2261 
2262 	/* mec0 is me1 */
2263 	ring->me = mec + 1;
2264 	ring->pipe = pipe;
2265 	ring->queue = queue;
2266 
2267 	ring->ring_obj = NULL;
2268 	ring->use_doorbell = true;
2269 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2270 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2271 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2272 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2273 
2274 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2275 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2276 		+ ring->pipe;
2277 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2278 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2279 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2280 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2281 				hw_prio, NULL);
2282 }
2283 
2284 static int gfx_v9_0_sw_init(void *handle)
2285 {
2286 	int i, j, k, r, ring_id;
2287 	struct amdgpu_ring *ring;
2288 	struct amdgpu_kiq *kiq;
2289 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2290 
2291 	switch (adev->ip_versions[GC_HWIP][0]) {
2292 	case IP_VERSION(9, 0, 1):
2293 	case IP_VERSION(9, 2, 1):
2294 	case IP_VERSION(9, 4, 0):
2295 	case IP_VERSION(9, 2, 2):
2296 	case IP_VERSION(9, 1, 0):
2297 	case IP_VERSION(9, 4, 1):
2298 	case IP_VERSION(9, 3, 0):
2299 	case IP_VERSION(9, 4, 2):
2300 		adev->gfx.mec.num_mec = 2;
2301 		break;
2302 	default:
2303 		adev->gfx.mec.num_mec = 1;
2304 		break;
2305 	}
2306 
2307 	adev->gfx.mec.num_pipe_per_mec = 4;
2308 	adev->gfx.mec.num_queue_per_pipe = 8;
2309 
2310 	/* EOP Event */
2311 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2312 	if (r)
2313 		return r;
2314 
2315 	/* Privileged reg */
2316 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2317 			      &adev->gfx.priv_reg_irq);
2318 	if (r)
2319 		return r;
2320 
2321 	/* Privileged inst */
2322 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2323 			      &adev->gfx.priv_inst_irq);
2324 	if (r)
2325 		return r;
2326 
2327 	/* ECC error */
2328 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2329 			      &adev->gfx.cp_ecc_error_irq);
2330 	if (r)
2331 		return r;
2332 
2333 	/* FUE error */
2334 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2335 			      &adev->gfx.cp_ecc_error_irq);
2336 	if (r)
2337 		return r;
2338 
2339 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2340 
2341 	gfx_v9_0_scratch_init(adev);
2342 
2343 	r = gfx_v9_0_init_microcode(adev);
2344 	if (r) {
2345 		DRM_ERROR("Failed to load gfx firmware!\n");
2346 		return r;
2347 	}
2348 
2349 	if (adev->gfx.rlc.funcs) {
2350 		if (adev->gfx.rlc.funcs->init) {
2351 			r = adev->gfx.rlc.funcs->init(adev);
2352 			if (r) {
2353 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2354 				return r;
2355 			}
2356 		}
2357 	}
2358 
2359 	r = gfx_v9_0_mec_init(adev);
2360 	if (r) {
2361 		DRM_ERROR("Failed to init MEC BOs!\n");
2362 		return r;
2363 	}
2364 
2365 	/* set up the gfx ring */
2366 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2367 		ring = &adev->gfx.gfx_ring[i];
2368 		ring->ring_obj = NULL;
2369 		if (!i)
2370 			sprintf(ring->name, "gfx");
2371 		else
2372 			sprintf(ring->name, "gfx_%d", i);
2373 		ring->use_doorbell = true;
2374 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2375 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2376 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2377 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2378 		if (r)
2379 			return r;
2380 	}
2381 
2382 	/* set up the compute queues - allocate horizontally across pipes */
2383 	ring_id = 0;
2384 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2385 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2386 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2387 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2388 					continue;
2389 
2390 				r = gfx_v9_0_compute_ring_init(adev,
2391 							       ring_id,
2392 							       i, k, j);
2393 				if (r)
2394 					return r;
2395 
2396 				ring_id++;
2397 			}
2398 		}
2399 	}
2400 
2401 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2402 	if (r) {
2403 		DRM_ERROR("Failed to init KIQ BOs!\n");
2404 		return r;
2405 	}
2406 
2407 	kiq = &adev->gfx.kiq;
2408 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2409 	if (r)
2410 		return r;
2411 
2412 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2413 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2414 	if (r)
2415 		return r;
2416 
2417 	adev->gfx.ce_ram_size = 0x8000;
2418 
2419 	r = gfx_v9_0_gpu_early_init(adev);
2420 	if (r)
2421 		return r;
2422 
2423 	return 0;
2424 }
2425 
2426 
2427 static int gfx_v9_0_sw_fini(void *handle)
2428 {
2429 	int i;
2430 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2431 
2432 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2433 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2434 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2435 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2436 
2437 	amdgpu_gfx_mqd_sw_fini(adev);
2438 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2439 	amdgpu_gfx_kiq_fini(adev);
2440 
2441 	gfx_v9_0_mec_fini(adev);
2442 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2443 				&adev->gfx.rlc.clear_state_gpu_addr,
2444 				(void **)&adev->gfx.rlc.cs_ptr);
2445 	if (adev->flags & AMD_IS_APU) {
2446 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2447 				&adev->gfx.rlc.cp_table_gpu_addr,
2448 				(void **)&adev->gfx.rlc.cp_table_ptr);
2449 	}
2450 	gfx_v9_0_free_microcode(adev);
2451 
2452 	return 0;
2453 }
2454 
2455 
2456 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2457 {
2458 	/* TODO */
2459 }
2460 
2461 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2462 			   u32 instance)
2463 {
2464 	u32 data;
2465 
2466 	if (instance == 0xffffffff)
2467 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2468 	else
2469 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2470 
2471 	if (se_num == 0xffffffff)
2472 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2473 	else
2474 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2475 
2476 	if (sh_num == 0xffffffff)
2477 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2478 	else
2479 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2480 
2481 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2482 }
2483 
2484 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2485 {
2486 	u32 data, mask;
2487 
2488 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2489 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2490 
2491 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2492 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2493 
2494 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2495 					 adev->gfx.config.max_sh_per_se);
2496 
2497 	return (~data) & mask;
2498 }
2499 
2500 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2501 {
2502 	int i, j;
2503 	u32 data;
2504 	u32 active_rbs = 0;
2505 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2506 					adev->gfx.config.max_sh_per_se;
2507 
2508 	mutex_lock(&adev->grbm_idx_mutex);
2509 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2510 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2511 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2512 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2513 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2514 					       rb_bitmap_width_per_sh);
2515 		}
2516 	}
2517 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2518 	mutex_unlock(&adev->grbm_idx_mutex);
2519 
2520 	adev->gfx.config.backend_enable_mask = active_rbs;
2521 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2522 }
2523 
2524 #define DEFAULT_SH_MEM_BASES	(0x6000)
2525 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2526 {
2527 	int i;
2528 	uint32_t sh_mem_config;
2529 	uint32_t sh_mem_bases;
2530 
2531 	/*
2532 	 * Configure apertures:
2533 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2534 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2535 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2536 	 */
2537 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2538 
2539 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2540 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2541 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2542 
2543 	mutex_lock(&adev->srbm_mutex);
2544 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2545 		soc15_grbm_select(adev, 0, 0, 0, i);
2546 		/* CP and shaders */
2547 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2548 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2549 	}
2550 	soc15_grbm_select(adev, 0, 0, 0, 0);
2551 	mutex_unlock(&adev->srbm_mutex);
2552 
2553 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2554 	   acccess. These should be enabled by FW for target VMIDs. */
2555 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2556 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2557 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2558 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2559 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2560 	}
2561 }
2562 
2563 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2564 {
2565 	int vmid;
2566 
2567 	/*
2568 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2569 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2570 	 * the driver can enable them for graphics. VMID0 should maintain
2571 	 * access so that HWS firmware can save/restore entries.
2572 	 */
2573 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2574 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2575 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2576 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2577 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2578 	}
2579 }
2580 
2581 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2582 {
2583 	uint32_t tmp;
2584 
2585 	switch (adev->ip_versions[GC_HWIP][0]) {
2586 	case IP_VERSION(9, 4, 1):
2587 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2588 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2589 					DISABLE_BARRIER_WAITCNT, 1);
2590 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2591 		break;
2592 	default:
2593 		break;
2594 	}
2595 }
2596 
2597 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2598 {
2599 	u32 tmp;
2600 	int i;
2601 
2602 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2603 
2604 	gfx_v9_0_tiling_mode_table_init(adev);
2605 
2606 	gfx_v9_0_setup_rb(adev);
2607 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2608 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2609 
2610 	/* XXX SH_MEM regs */
2611 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2612 	mutex_lock(&adev->srbm_mutex);
2613 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2614 		soc15_grbm_select(adev, 0, 0, 0, i);
2615 		/* CP and shaders */
2616 		if (i == 0) {
2617 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2618 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2619 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2620 					    !!adev->gmc.noretry);
2621 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2622 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2623 		} else {
2624 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2625 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2626 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2627 					    !!adev->gmc.noretry);
2628 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2629 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2630 				(adev->gmc.private_aperture_start >> 48));
2631 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2632 				(adev->gmc.shared_aperture_start >> 48));
2633 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2634 		}
2635 	}
2636 	soc15_grbm_select(adev, 0, 0, 0, 0);
2637 
2638 	mutex_unlock(&adev->srbm_mutex);
2639 
2640 	gfx_v9_0_init_compute_vmid(adev);
2641 	gfx_v9_0_init_gds_vmid(adev);
2642 	gfx_v9_0_init_sq_config(adev);
2643 }
2644 
2645 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2646 {
2647 	u32 i, j, k;
2648 	u32 mask;
2649 
2650 	mutex_lock(&adev->grbm_idx_mutex);
2651 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2652 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2653 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2654 			for (k = 0; k < adev->usec_timeout; k++) {
2655 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2656 					break;
2657 				udelay(1);
2658 			}
2659 			if (k == adev->usec_timeout) {
2660 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2661 						      0xffffffff, 0xffffffff);
2662 				mutex_unlock(&adev->grbm_idx_mutex);
2663 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2664 					 i, j);
2665 				return;
2666 			}
2667 		}
2668 	}
2669 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2670 	mutex_unlock(&adev->grbm_idx_mutex);
2671 
2672 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2673 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2674 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2675 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2676 	for (k = 0; k < adev->usec_timeout; k++) {
2677 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2678 			break;
2679 		udelay(1);
2680 	}
2681 }
2682 
2683 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2684 					       bool enable)
2685 {
2686 	u32 tmp;
2687 
2688 	/* These interrupts should be enabled to drive DS clock */
2689 
2690 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2691 
2692 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2693 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2694 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2695 	if(adev->gfx.num_gfx_rings)
2696 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2697 
2698 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2699 }
2700 
2701 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2702 {
2703 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2704 	/* csib */
2705 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2706 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2707 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2708 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2709 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2710 			adev->gfx.rlc.clear_state_size);
2711 }
2712 
2713 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2714 				int indirect_offset,
2715 				int list_size,
2716 				int *unique_indirect_regs,
2717 				int unique_indirect_reg_count,
2718 				int *indirect_start_offsets,
2719 				int *indirect_start_offsets_count,
2720 				int max_start_offsets_count)
2721 {
2722 	int idx;
2723 
2724 	for (; indirect_offset < list_size; indirect_offset++) {
2725 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2726 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2727 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2728 
2729 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2730 			indirect_offset += 2;
2731 
2732 			/* look for the matching indice */
2733 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2734 				if (unique_indirect_regs[idx] ==
2735 					register_list_format[indirect_offset] ||
2736 					!unique_indirect_regs[idx])
2737 					break;
2738 			}
2739 
2740 			BUG_ON(idx >= unique_indirect_reg_count);
2741 
2742 			if (!unique_indirect_regs[idx])
2743 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2744 
2745 			indirect_offset++;
2746 		}
2747 	}
2748 }
2749 
2750 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2751 {
2752 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2753 	int unique_indirect_reg_count = 0;
2754 
2755 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2756 	int indirect_start_offsets_count = 0;
2757 
2758 	int list_size = 0;
2759 	int i = 0, j = 0;
2760 	u32 tmp = 0;
2761 
2762 	u32 *register_list_format =
2763 		kmemdup(adev->gfx.rlc.register_list_format,
2764 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2765 	if (!register_list_format)
2766 		return -ENOMEM;
2767 
2768 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2769 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2770 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2771 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2772 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2773 				    unique_indirect_regs,
2774 				    unique_indirect_reg_count,
2775 				    indirect_start_offsets,
2776 				    &indirect_start_offsets_count,
2777 				    ARRAY_SIZE(indirect_start_offsets));
2778 
2779 	/* enable auto inc in case it is disabled */
2780 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2781 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2782 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2783 
2784 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2785 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2786 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2787 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2788 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2789 			adev->gfx.rlc.register_restore[i]);
2790 
2791 	/* load indirect register */
2792 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2793 		adev->gfx.rlc.reg_list_format_start);
2794 
2795 	/* direct register portion */
2796 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2797 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2798 			register_list_format[i]);
2799 
2800 	/* indirect register portion */
2801 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2802 		if (register_list_format[i] == 0xFFFFFFFF) {
2803 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2804 			continue;
2805 		}
2806 
2807 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2808 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2809 
2810 		for (j = 0; j < unique_indirect_reg_count; j++) {
2811 			if (register_list_format[i] == unique_indirect_regs[j]) {
2812 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2813 				break;
2814 			}
2815 		}
2816 
2817 		BUG_ON(j >= unique_indirect_reg_count);
2818 
2819 		i++;
2820 	}
2821 
2822 	/* set save/restore list size */
2823 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2824 	list_size = list_size >> 1;
2825 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2826 		adev->gfx.rlc.reg_restore_list_size);
2827 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2828 
2829 	/* write the starting offsets to RLC scratch ram */
2830 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2831 		adev->gfx.rlc.starting_offsets_start);
2832 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2833 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2834 		       indirect_start_offsets[i]);
2835 
2836 	/* load unique indirect regs*/
2837 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2838 		if (unique_indirect_regs[i] != 0) {
2839 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2840 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2841 			       unique_indirect_regs[i] & 0x3FFFF);
2842 
2843 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2844 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2845 			       unique_indirect_regs[i] >> 20);
2846 		}
2847 	}
2848 
2849 	kfree(register_list_format);
2850 	return 0;
2851 }
2852 
2853 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2854 {
2855 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2856 }
2857 
2858 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2859 					     bool enable)
2860 {
2861 	uint32_t data = 0;
2862 	uint32_t default_data = 0;
2863 
2864 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2865 	if (enable) {
2866 		/* enable GFXIP control over CGPG */
2867 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2868 		if(default_data != data)
2869 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2870 
2871 		/* update status */
2872 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2873 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2874 		if(default_data != data)
2875 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2876 	} else {
2877 		/* restore GFXIP control over GCPG */
2878 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2879 		if(default_data != data)
2880 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2881 	}
2882 }
2883 
2884 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2885 {
2886 	uint32_t data = 0;
2887 
2888 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2889 			      AMD_PG_SUPPORT_GFX_SMG |
2890 			      AMD_PG_SUPPORT_GFX_DMG)) {
2891 		/* init IDLE_POLL_COUNT = 60 */
2892 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2893 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2894 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2895 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2896 
2897 		/* init RLC PG Delay */
2898 		data = 0;
2899 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2900 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2901 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2902 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2903 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2904 
2905 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2906 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2907 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2908 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2909 
2910 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2911 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2912 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2913 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2914 
2915 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2916 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2917 
2918 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2919 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2920 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2921 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2922 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2923 	}
2924 }
2925 
2926 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2927 						bool enable)
2928 {
2929 	uint32_t data = 0;
2930 	uint32_t default_data = 0;
2931 
2932 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2933 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2934 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2935 			     enable ? 1 : 0);
2936 	if (default_data != data)
2937 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2938 }
2939 
2940 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2941 						bool enable)
2942 {
2943 	uint32_t data = 0;
2944 	uint32_t default_data = 0;
2945 
2946 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2947 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2948 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2949 			     enable ? 1 : 0);
2950 	if(default_data != data)
2951 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2952 }
2953 
2954 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2955 					bool enable)
2956 {
2957 	uint32_t data = 0;
2958 	uint32_t default_data = 0;
2959 
2960 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2961 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2962 			     CP_PG_DISABLE,
2963 			     enable ? 0 : 1);
2964 	if(default_data != data)
2965 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2966 }
2967 
2968 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2969 						bool enable)
2970 {
2971 	uint32_t data, default_data;
2972 
2973 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2974 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2975 			     GFX_POWER_GATING_ENABLE,
2976 			     enable ? 1 : 0);
2977 	if(default_data != data)
2978 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2979 }
2980 
2981 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2982 						bool enable)
2983 {
2984 	uint32_t data, default_data;
2985 
2986 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2987 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2988 			     GFX_PIPELINE_PG_ENABLE,
2989 			     enable ? 1 : 0);
2990 	if(default_data != data)
2991 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2992 
2993 	if (!enable)
2994 		/* read any GFX register to wake up GFX */
2995 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2996 }
2997 
2998 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2999 						       bool enable)
3000 {
3001 	uint32_t data, default_data;
3002 
3003 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3004 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3005 			     STATIC_PER_CU_PG_ENABLE,
3006 			     enable ? 1 : 0);
3007 	if(default_data != data)
3008 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3009 }
3010 
3011 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3012 						bool enable)
3013 {
3014 	uint32_t data, default_data;
3015 
3016 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3017 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3018 			     DYN_PER_CU_PG_ENABLE,
3019 			     enable ? 1 : 0);
3020 	if(default_data != data)
3021 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3022 }
3023 
3024 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3025 {
3026 	gfx_v9_0_init_csb(adev);
3027 
3028 	/*
3029 	 * Rlc save restore list is workable since v2_1.
3030 	 * And it's needed by gfxoff feature.
3031 	 */
3032 	if (adev->gfx.rlc.is_rlc_v2_1) {
3033 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3034 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3035 			gfx_v9_1_init_rlc_save_restore_list(adev);
3036 		gfx_v9_0_enable_save_restore_machine(adev);
3037 	}
3038 
3039 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3040 			      AMD_PG_SUPPORT_GFX_SMG |
3041 			      AMD_PG_SUPPORT_GFX_DMG |
3042 			      AMD_PG_SUPPORT_CP |
3043 			      AMD_PG_SUPPORT_GDS |
3044 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3045 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3046 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3047 		gfx_v9_0_init_gfx_power_gating(adev);
3048 	}
3049 }
3050 
3051 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3052 {
3053 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3054 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3055 	gfx_v9_0_wait_for_rlc_serdes(adev);
3056 }
3057 
3058 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3059 {
3060 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3061 	udelay(50);
3062 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3063 	udelay(50);
3064 }
3065 
3066 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3067 {
3068 #ifdef AMDGPU_RLC_DEBUG_RETRY
3069 	u32 rlc_ucode_ver;
3070 #endif
3071 
3072 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3073 	udelay(50);
3074 
3075 	/* carrizo do enable cp interrupt after cp inited */
3076 	if (!(adev->flags & AMD_IS_APU)) {
3077 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3078 		udelay(50);
3079 	}
3080 
3081 #ifdef AMDGPU_RLC_DEBUG_RETRY
3082 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3083 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3084 	if(rlc_ucode_ver == 0x108) {
3085 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3086 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3087 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3088 		 * default is 0x9C4 to create a 100us interval */
3089 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3090 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3091 		 * to disable the page fault retry interrupts, default is
3092 		 * 0x100 (256) */
3093 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3094 	}
3095 #endif
3096 }
3097 
3098 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3099 {
3100 	const struct rlc_firmware_header_v2_0 *hdr;
3101 	const __le32 *fw_data;
3102 	unsigned i, fw_size;
3103 
3104 	if (!adev->gfx.rlc_fw)
3105 		return -EINVAL;
3106 
3107 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3108 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3109 
3110 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3111 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3112 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3113 
3114 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3115 			RLCG_UCODE_LOADING_START_ADDRESS);
3116 	for (i = 0; i < fw_size; i++)
3117 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3118 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3119 
3120 	return 0;
3121 }
3122 
3123 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3124 {
3125 	int r;
3126 
3127 	if (amdgpu_sriov_vf(adev)) {
3128 		gfx_v9_0_init_csb(adev);
3129 		return 0;
3130 	}
3131 
3132 	adev->gfx.rlc.funcs->stop(adev);
3133 
3134 	/* disable CG */
3135 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3136 
3137 	gfx_v9_0_init_pg(adev);
3138 
3139 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3140 		/* legacy rlc firmware loading */
3141 		r = gfx_v9_0_rlc_load_microcode(adev);
3142 		if (r)
3143 			return r;
3144 	}
3145 
3146 	switch (adev->ip_versions[GC_HWIP][0]) {
3147 	case IP_VERSION(9, 2, 2):
3148 	case IP_VERSION(9, 1, 0):
3149 		if (amdgpu_lbpw == 0)
3150 			gfx_v9_0_enable_lbpw(adev, false);
3151 		else
3152 			gfx_v9_0_enable_lbpw(adev, true);
3153 		break;
3154 	case IP_VERSION(9, 4, 0):
3155 		if (amdgpu_lbpw > 0)
3156 			gfx_v9_0_enable_lbpw(adev, true);
3157 		else
3158 			gfx_v9_0_enable_lbpw(adev, false);
3159 		break;
3160 	default:
3161 		break;
3162 	}
3163 
3164 	adev->gfx.rlc.funcs->start(adev);
3165 
3166 	return 0;
3167 }
3168 
3169 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3170 {
3171 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3172 
3173 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3174 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3175 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3176 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3177 	udelay(50);
3178 }
3179 
3180 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3181 {
3182 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3183 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3184 	const struct gfx_firmware_header_v1_0 *me_hdr;
3185 	const __le32 *fw_data;
3186 	unsigned i, fw_size;
3187 
3188 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3189 		return -EINVAL;
3190 
3191 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3192 		adev->gfx.pfp_fw->data;
3193 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3194 		adev->gfx.ce_fw->data;
3195 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3196 		adev->gfx.me_fw->data;
3197 
3198 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3199 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3200 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3201 
3202 	gfx_v9_0_cp_gfx_enable(adev, false);
3203 
3204 	/* PFP */
3205 	fw_data = (const __le32 *)
3206 		(adev->gfx.pfp_fw->data +
3207 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3208 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3209 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3210 	for (i = 0; i < fw_size; i++)
3211 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3212 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3213 
3214 	/* CE */
3215 	fw_data = (const __le32 *)
3216 		(adev->gfx.ce_fw->data +
3217 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3218 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3219 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3220 	for (i = 0; i < fw_size; i++)
3221 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3222 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3223 
3224 	/* ME */
3225 	fw_data = (const __le32 *)
3226 		(adev->gfx.me_fw->data +
3227 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3228 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3229 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3230 	for (i = 0; i < fw_size; i++)
3231 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3232 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3233 
3234 	return 0;
3235 }
3236 
3237 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3238 {
3239 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3240 	const struct cs_section_def *sect = NULL;
3241 	const struct cs_extent_def *ext = NULL;
3242 	int r, i, tmp;
3243 
3244 	/* init the CP */
3245 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3246 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3247 
3248 	gfx_v9_0_cp_gfx_enable(adev, true);
3249 
3250 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3251 	if (r) {
3252 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3253 		return r;
3254 	}
3255 
3256 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3257 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3258 
3259 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3260 	amdgpu_ring_write(ring, 0x80000000);
3261 	amdgpu_ring_write(ring, 0x80000000);
3262 
3263 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3264 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3265 			if (sect->id == SECT_CONTEXT) {
3266 				amdgpu_ring_write(ring,
3267 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3268 					       ext->reg_count));
3269 				amdgpu_ring_write(ring,
3270 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3271 				for (i = 0; i < ext->reg_count; i++)
3272 					amdgpu_ring_write(ring, ext->extent[i]);
3273 			}
3274 		}
3275 	}
3276 
3277 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3278 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3279 
3280 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3281 	amdgpu_ring_write(ring, 0);
3282 
3283 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3284 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3285 	amdgpu_ring_write(ring, 0x8000);
3286 	amdgpu_ring_write(ring, 0x8000);
3287 
3288 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3289 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3290 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3291 	amdgpu_ring_write(ring, tmp);
3292 	amdgpu_ring_write(ring, 0);
3293 
3294 	amdgpu_ring_commit(ring);
3295 
3296 	return 0;
3297 }
3298 
3299 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3300 {
3301 	struct amdgpu_ring *ring;
3302 	u32 tmp;
3303 	u32 rb_bufsz;
3304 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3305 
3306 	/* Set the write pointer delay */
3307 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3308 
3309 	/* set the RB to use vmid 0 */
3310 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3311 
3312 	/* Set ring buffer size */
3313 	ring = &adev->gfx.gfx_ring[0];
3314 	rb_bufsz = order_base_2(ring->ring_size / 8);
3315 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3316 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3317 #ifdef __BIG_ENDIAN
3318 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3319 #endif
3320 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3321 
3322 	/* Initialize the ring buffer's write pointers */
3323 	ring->wptr = 0;
3324 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3325 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3326 
3327 	/* set the wb address wether it's enabled or not */
3328 	rptr_addr = ring->rptr_gpu_addr;
3329 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3330 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3331 
3332 	wptr_gpu_addr = ring->wptr_gpu_addr;
3333 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3334 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3335 
3336 	mdelay(1);
3337 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3338 
3339 	rb_addr = ring->gpu_addr >> 8;
3340 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3341 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3342 
3343 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3344 	if (ring->use_doorbell) {
3345 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3346 				    DOORBELL_OFFSET, ring->doorbell_index);
3347 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3348 				    DOORBELL_EN, 1);
3349 	} else {
3350 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3351 	}
3352 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3353 
3354 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3355 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3356 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3357 
3358 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3359 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3360 
3361 
3362 	/* start the ring */
3363 	gfx_v9_0_cp_gfx_start(adev);
3364 	ring->sched.ready = true;
3365 
3366 	return 0;
3367 }
3368 
3369 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3370 {
3371 	if (enable) {
3372 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3373 	} else {
3374 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3375 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3376 		adev->gfx.kiq.ring.sched.ready = false;
3377 	}
3378 	udelay(50);
3379 }
3380 
3381 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3382 {
3383 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3384 	const __le32 *fw_data;
3385 	unsigned i;
3386 	u32 tmp;
3387 
3388 	if (!adev->gfx.mec_fw)
3389 		return -EINVAL;
3390 
3391 	gfx_v9_0_cp_compute_enable(adev, false);
3392 
3393 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3394 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3395 
3396 	fw_data = (const __le32 *)
3397 		(adev->gfx.mec_fw->data +
3398 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3399 	tmp = 0;
3400 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3401 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3402 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3403 
3404 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3405 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3406 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3407 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3408 
3409 	/* MEC1 */
3410 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3411 			 mec_hdr->jt_offset);
3412 	for (i = 0; i < mec_hdr->jt_size; i++)
3413 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3414 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3415 
3416 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3417 			adev->gfx.mec_fw_version);
3418 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3419 
3420 	return 0;
3421 }
3422 
3423 /* KIQ functions */
3424 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3425 {
3426 	uint32_t tmp;
3427 	struct amdgpu_device *adev = ring->adev;
3428 
3429 	/* tell RLC which is KIQ queue */
3430 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3431 	tmp &= 0xffffff00;
3432 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3433 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3434 	tmp |= 0x80;
3435 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3436 }
3437 
3438 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3439 {
3440 	struct amdgpu_device *adev = ring->adev;
3441 
3442 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3443 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3444 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3445 			mqd->cp_hqd_queue_priority =
3446 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3447 		}
3448 	}
3449 }
3450 
3451 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3452 {
3453 	struct amdgpu_device *adev = ring->adev;
3454 	struct v9_mqd *mqd = ring->mqd_ptr;
3455 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3456 	uint32_t tmp;
3457 
3458 	mqd->header = 0xC0310800;
3459 	mqd->compute_pipelinestat_enable = 0x00000001;
3460 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3461 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3462 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3463 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3464 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3465 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3466 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3467 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3468 	mqd->compute_misc_reserved = 0x00000003;
3469 
3470 	mqd->dynamic_cu_mask_addr_lo =
3471 		lower_32_bits(ring->mqd_gpu_addr
3472 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3473 	mqd->dynamic_cu_mask_addr_hi =
3474 		upper_32_bits(ring->mqd_gpu_addr
3475 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3476 
3477 	eop_base_addr = ring->eop_gpu_addr >> 8;
3478 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3479 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3480 
3481 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3482 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3483 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3484 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3485 
3486 	mqd->cp_hqd_eop_control = tmp;
3487 
3488 	/* enable doorbell? */
3489 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3490 
3491 	if (ring->use_doorbell) {
3492 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3493 				    DOORBELL_OFFSET, ring->doorbell_index);
3494 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3495 				    DOORBELL_EN, 1);
3496 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3497 				    DOORBELL_SOURCE, 0);
3498 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3499 				    DOORBELL_HIT, 0);
3500 	} else {
3501 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502 					 DOORBELL_EN, 0);
3503 	}
3504 
3505 	mqd->cp_hqd_pq_doorbell_control = tmp;
3506 
3507 	/* disable the queue if it's active */
3508 	ring->wptr = 0;
3509 	mqd->cp_hqd_dequeue_request = 0;
3510 	mqd->cp_hqd_pq_rptr = 0;
3511 	mqd->cp_hqd_pq_wptr_lo = 0;
3512 	mqd->cp_hqd_pq_wptr_hi = 0;
3513 
3514 	/* set the pointer to the MQD */
3515 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3516 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3517 
3518 	/* set MQD vmid to 0 */
3519 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3520 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3521 	mqd->cp_mqd_control = tmp;
3522 
3523 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3524 	hqd_gpu_addr = ring->gpu_addr >> 8;
3525 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3526 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3527 
3528 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3529 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3530 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3531 			    (order_base_2(ring->ring_size / 4) - 1));
3532 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3533 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3534 #ifdef __BIG_ENDIAN
3535 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3536 #endif
3537 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3538 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3539 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3540 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3541 	mqd->cp_hqd_pq_control = tmp;
3542 
3543 	/* set the wb address whether it's enabled or not */
3544 	wb_gpu_addr = ring->rptr_gpu_addr;
3545 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3546 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3547 		upper_32_bits(wb_gpu_addr) & 0xffff;
3548 
3549 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3550 	wb_gpu_addr = ring->wptr_gpu_addr;
3551 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3552 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3553 
3554 	tmp = 0;
3555 	/* enable the doorbell if requested */
3556 	if (ring->use_doorbell) {
3557 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3558 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3559 				DOORBELL_OFFSET, ring->doorbell_index);
3560 
3561 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3562 					 DOORBELL_EN, 1);
3563 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3564 					 DOORBELL_SOURCE, 0);
3565 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3566 					 DOORBELL_HIT, 0);
3567 	}
3568 
3569 	mqd->cp_hqd_pq_doorbell_control = tmp;
3570 
3571 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3572 	ring->wptr = 0;
3573 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3574 
3575 	/* set the vmid for the queue */
3576 	mqd->cp_hqd_vmid = 0;
3577 
3578 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3579 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3580 	mqd->cp_hqd_persistent_state = tmp;
3581 
3582 	/* set MIN_IB_AVAIL_SIZE */
3583 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3584 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3585 	mqd->cp_hqd_ib_control = tmp;
3586 
3587 	/* set static priority for a queue/ring */
3588 	gfx_v9_0_mqd_set_priority(ring, mqd);
3589 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3590 
3591 	/* map_queues packet doesn't need activate the queue,
3592 	 * so only kiq need set this field.
3593 	 */
3594 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3595 		mqd->cp_hqd_active = 1;
3596 
3597 	return 0;
3598 }
3599 
3600 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3601 {
3602 	struct amdgpu_device *adev = ring->adev;
3603 	struct v9_mqd *mqd = ring->mqd_ptr;
3604 	int j;
3605 
3606 	/* disable wptr polling */
3607 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3608 
3609 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3610 	       mqd->cp_hqd_eop_base_addr_lo);
3611 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3612 	       mqd->cp_hqd_eop_base_addr_hi);
3613 
3614 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3615 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3616 	       mqd->cp_hqd_eop_control);
3617 
3618 	/* enable doorbell? */
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3620 	       mqd->cp_hqd_pq_doorbell_control);
3621 
3622 	/* disable the queue if it's active */
3623 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3624 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3625 		for (j = 0; j < adev->usec_timeout; j++) {
3626 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3627 				break;
3628 			udelay(1);
3629 		}
3630 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3631 		       mqd->cp_hqd_dequeue_request);
3632 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3633 		       mqd->cp_hqd_pq_rptr);
3634 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3635 		       mqd->cp_hqd_pq_wptr_lo);
3636 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3637 		       mqd->cp_hqd_pq_wptr_hi);
3638 	}
3639 
3640 	/* set the pointer to the MQD */
3641 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3642 	       mqd->cp_mqd_base_addr_lo);
3643 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3644 	       mqd->cp_mqd_base_addr_hi);
3645 
3646 	/* set MQD vmid to 0 */
3647 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3648 	       mqd->cp_mqd_control);
3649 
3650 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3651 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3652 	       mqd->cp_hqd_pq_base_lo);
3653 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3654 	       mqd->cp_hqd_pq_base_hi);
3655 
3656 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3657 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3658 	       mqd->cp_hqd_pq_control);
3659 
3660 	/* set the wb address whether it's enabled or not */
3661 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3662 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3663 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3664 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3665 
3666 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3667 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3668 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3670 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3671 
3672 	/* enable the doorbell if requested */
3673 	if (ring->use_doorbell) {
3674 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3675 					(adev->doorbell_index.kiq * 2) << 2);
3676 		/* If GC has entered CGPG, ringing doorbell > first page
3677 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3678 		 * workaround this issue. And this change has to align with firmware
3679 		 * update.
3680 		 */
3681 		if (check_if_enlarge_doorbell_range(adev))
3682 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3683 					(adev->doorbell.size - 4));
3684 		else
3685 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3686 					(adev->doorbell_index.userqueue_end * 2) << 2);
3687 	}
3688 
3689 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3690 	       mqd->cp_hqd_pq_doorbell_control);
3691 
3692 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3693 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3694 	       mqd->cp_hqd_pq_wptr_lo);
3695 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3696 	       mqd->cp_hqd_pq_wptr_hi);
3697 
3698 	/* set the vmid for the queue */
3699 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3700 
3701 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3702 	       mqd->cp_hqd_persistent_state);
3703 
3704 	/* activate the queue */
3705 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3706 	       mqd->cp_hqd_active);
3707 
3708 	if (ring->use_doorbell)
3709 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3710 
3711 	return 0;
3712 }
3713 
3714 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3715 {
3716 	struct amdgpu_device *adev = ring->adev;
3717 	int j;
3718 
3719 	/* disable the queue if it's active */
3720 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3721 
3722 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3723 
3724 		for (j = 0; j < adev->usec_timeout; j++) {
3725 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3726 				break;
3727 			udelay(1);
3728 		}
3729 
3730 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3731 			DRM_DEBUG("KIQ dequeue request failed.\n");
3732 
3733 			/* Manual disable if dequeue request times out */
3734 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3735 		}
3736 
3737 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3738 		      0);
3739 	}
3740 
3741 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3742 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3743 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3744 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3745 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3746 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3747 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3748 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3749 
3750 	return 0;
3751 }
3752 
3753 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3754 {
3755 	struct amdgpu_device *adev = ring->adev;
3756 	struct v9_mqd *mqd = ring->mqd_ptr;
3757 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3758 	struct v9_mqd *tmp_mqd;
3759 
3760 	gfx_v9_0_kiq_setting(ring);
3761 
3762 	/* GPU could be in bad state during probe, driver trigger the reset
3763 	 * after load the SMU, in this case , the mqd is not be initialized.
3764 	 * driver need to re-init the mqd.
3765 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3766 	 */
3767 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3768 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3769 		/* for GPU_RESET case , reset MQD to a clean status */
3770 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3771 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3772 
3773 		/* reset ring buffer */
3774 		ring->wptr = 0;
3775 		amdgpu_ring_clear_ring(ring);
3776 
3777 		mutex_lock(&adev->srbm_mutex);
3778 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3779 		gfx_v9_0_kiq_init_register(ring);
3780 		soc15_grbm_select(adev, 0, 0, 0, 0);
3781 		mutex_unlock(&adev->srbm_mutex);
3782 	} else {
3783 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3784 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3785 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3786 		mutex_lock(&adev->srbm_mutex);
3787 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3788 		gfx_v9_0_mqd_init(ring);
3789 		gfx_v9_0_kiq_init_register(ring);
3790 		soc15_grbm_select(adev, 0, 0, 0, 0);
3791 		mutex_unlock(&adev->srbm_mutex);
3792 
3793 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3794 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3795 	}
3796 
3797 	return 0;
3798 }
3799 
3800 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3801 {
3802 	struct amdgpu_device *adev = ring->adev;
3803 	struct v9_mqd *mqd = ring->mqd_ptr;
3804 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3805 	struct v9_mqd *tmp_mqd;
3806 
3807 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3808 	 * is not be initialized before
3809 	 */
3810 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3811 
3812 	if (!tmp_mqd->cp_hqd_pq_control ||
3813 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3814 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3815 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3816 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3817 		mutex_lock(&adev->srbm_mutex);
3818 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3819 		gfx_v9_0_mqd_init(ring);
3820 		soc15_grbm_select(adev, 0, 0, 0, 0);
3821 		mutex_unlock(&adev->srbm_mutex);
3822 
3823 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3824 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3825 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3826 		/* reset MQD to a clean status */
3827 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3828 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3829 
3830 		/* reset ring buffer */
3831 		ring->wptr = 0;
3832 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3833 		amdgpu_ring_clear_ring(ring);
3834 	} else {
3835 		amdgpu_ring_clear_ring(ring);
3836 	}
3837 
3838 	return 0;
3839 }
3840 
3841 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3842 {
3843 	struct amdgpu_ring *ring;
3844 	int r;
3845 
3846 	ring = &adev->gfx.kiq.ring;
3847 
3848 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3849 	if (unlikely(r != 0))
3850 		return r;
3851 
3852 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3853 	if (unlikely(r != 0))
3854 		return r;
3855 
3856 	gfx_v9_0_kiq_init_queue(ring);
3857 	amdgpu_bo_kunmap(ring->mqd_obj);
3858 	ring->mqd_ptr = NULL;
3859 	amdgpu_bo_unreserve(ring->mqd_obj);
3860 	ring->sched.ready = true;
3861 	return 0;
3862 }
3863 
3864 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3865 {
3866 	struct amdgpu_ring *ring = NULL;
3867 	int r = 0, i;
3868 
3869 	gfx_v9_0_cp_compute_enable(adev, true);
3870 
3871 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3872 		ring = &adev->gfx.compute_ring[i];
3873 
3874 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3875 		if (unlikely(r != 0))
3876 			goto done;
3877 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3878 		if (!r) {
3879 			r = gfx_v9_0_kcq_init_queue(ring);
3880 			amdgpu_bo_kunmap(ring->mqd_obj);
3881 			ring->mqd_ptr = NULL;
3882 		}
3883 		amdgpu_bo_unreserve(ring->mqd_obj);
3884 		if (r)
3885 			goto done;
3886 	}
3887 
3888 	r = amdgpu_gfx_enable_kcq(adev);
3889 done:
3890 	return r;
3891 }
3892 
3893 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3894 {
3895 	int r, i;
3896 	struct amdgpu_ring *ring;
3897 
3898 	if (!(adev->flags & AMD_IS_APU))
3899 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3900 
3901 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3902 		if (adev->gfx.num_gfx_rings) {
3903 			/* legacy firmware loading */
3904 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3905 			if (r)
3906 				return r;
3907 		}
3908 
3909 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3910 		if (r)
3911 			return r;
3912 	}
3913 
3914 	r = gfx_v9_0_kiq_resume(adev);
3915 	if (r)
3916 		return r;
3917 
3918 	if (adev->gfx.num_gfx_rings) {
3919 		r = gfx_v9_0_cp_gfx_resume(adev);
3920 		if (r)
3921 			return r;
3922 	}
3923 
3924 	r = gfx_v9_0_kcq_resume(adev);
3925 	if (r)
3926 		return r;
3927 
3928 	if (adev->gfx.num_gfx_rings) {
3929 		ring = &adev->gfx.gfx_ring[0];
3930 		r = amdgpu_ring_test_helper(ring);
3931 		if (r)
3932 			return r;
3933 	}
3934 
3935 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3936 		ring = &adev->gfx.compute_ring[i];
3937 		amdgpu_ring_test_helper(ring);
3938 	}
3939 
3940 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3941 
3942 	return 0;
3943 }
3944 
3945 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3946 {
3947 	u32 tmp;
3948 
3949 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3950 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3951 		return;
3952 
3953 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3954 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3955 				adev->df.hash_status.hash_64k);
3956 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3957 				adev->df.hash_status.hash_2m);
3958 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3959 				adev->df.hash_status.hash_1g);
3960 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3961 }
3962 
3963 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3964 {
3965 	if (adev->gfx.num_gfx_rings)
3966 		gfx_v9_0_cp_gfx_enable(adev, enable);
3967 	gfx_v9_0_cp_compute_enable(adev, enable);
3968 }
3969 
3970 static int gfx_v9_0_hw_init(void *handle)
3971 {
3972 	int r;
3973 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3974 
3975 	if (!amdgpu_sriov_vf(adev))
3976 		gfx_v9_0_init_golden_registers(adev);
3977 
3978 	gfx_v9_0_constants_init(adev);
3979 
3980 	gfx_v9_0_init_tcp_config(adev);
3981 
3982 	r = adev->gfx.rlc.funcs->resume(adev);
3983 	if (r)
3984 		return r;
3985 
3986 	r = gfx_v9_0_cp_resume(adev);
3987 	if (r)
3988 		return r;
3989 
3990 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3991 		gfx_v9_4_2_set_power_brake_sequence(adev);
3992 
3993 	return r;
3994 }
3995 
3996 static int gfx_v9_0_hw_fini(void *handle)
3997 {
3998 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3999 
4000 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4001 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4002 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4003 
4004 	/* DF freeze and kcq disable will fail */
4005 	if (!amdgpu_ras_intr_triggered())
4006 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4007 		amdgpu_gfx_disable_kcq(adev);
4008 
4009 	if (amdgpu_sriov_vf(adev)) {
4010 		gfx_v9_0_cp_gfx_enable(adev, false);
4011 		/* must disable polling for SRIOV when hw finished, otherwise
4012 		 * CPC engine may still keep fetching WB address which is already
4013 		 * invalid after sw finished and trigger DMAR reading error in
4014 		 * hypervisor side.
4015 		 */
4016 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4017 		return 0;
4018 	}
4019 
4020 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4021 	 * otherwise KIQ is hanging when binding back
4022 	 */
4023 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4024 		mutex_lock(&adev->srbm_mutex);
4025 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4026 				adev->gfx.kiq.ring.pipe,
4027 				adev->gfx.kiq.ring.queue, 0);
4028 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4029 		soc15_grbm_select(adev, 0, 0, 0, 0);
4030 		mutex_unlock(&adev->srbm_mutex);
4031 	}
4032 
4033 	gfx_v9_0_cp_enable(adev, false);
4034 
4035 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4036 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4037 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4038 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4039 		return 0;
4040 	}
4041 
4042 	adev->gfx.rlc.funcs->stop(adev);
4043 	return 0;
4044 }
4045 
4046 static int gfx_v9_0_suspend(void *handle)
4047 {
4048 	return gfx_v9_0_hw_fini(handle);
4049 }
4050 
4051 static int gfx_v9_0_resume(void *handle)
4052 {
4053 	return gfx_v9_0_hw_init(handle);
4054 }
4055 
4056 static bool gfx_v9_0_is_idle(void *handle)
4057 {
4058 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4059 
4060 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4061 				GRBM_STATUS, GUI_ACTIVE))
4062 		return false;
4063 	else
4064 		return true;
4065 }
4066 
4067 static int gfx_v9_0_wait_for_idle(void *handle)
4068 {
4069 	unsigned i;
4070 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4071 
4072 	for (i = 0; i < adev->usec_timeout; i++) {
4073 		if (gfx_v9_0_is_idle(handle))
4074 			return 0;
4075 		udelay(1);
4076 	}
4077 	return -ETIMEDOUT;
4078 }
4079 
4080 static int gfx_v9_0_soft_reset(void *handle)
4081 {
4082 	u32 grbm_soft_reset = 0;
4083 	u32 tmp;
4084 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4085 
4086 	/* GRBM_STATUS */
4087 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4088 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4089 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4090 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4091 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4092 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4093 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4094 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4095 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4096 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4097 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4098 	}
4099 
4100 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4101 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4102 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4103 	}
4104 
4105 	/* GRBM_STATUS2 */
4106 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4107 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4108 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4109 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4110 
4111 
4112 	if (grbm_soft_reset) {
4113 		/* stop the rlc */
4114 		adev->gfx.rlc.funcs->stop(adev);
4115 
4116 		if (adev->gfx.num_gfx_rings)
4117 			/* Disable GFX parsing/prefetching */
4118 			gfx_v9_0_cp_gfx_enable(adev, false);
4119 
4120 		/* Disable MEC parsing/prefetching */
4121 		gfx_v9_0_cp_compute_enable(adev, false);
4122 
4123 		if (grbm_soft_reset) {
4124 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4125 			tmp |= grbm_soft_reset;
4126 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4127 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4128 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4129 
4130 			udelay(50);
4131 
4132 			tmp &= ~grbm_soft_reset;
4133 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4134 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4135 		}
4136 
4137 		/* Wait a little for things to settle down */
4138 		udelay(50);
4139 	}
4140 	return 0;
4141 }
4142 
4143 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4144 {
4145 	signed long r, cnt = 0;
4146 	unsigned long flags;
4147 	uint32_t seq, reg_val_offs = 0;
4148 	uint64_t value = 0;
4149 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4150 	struct amdgpu_ring *ring = &kiq->ring;
4151 
4152 	BUG_ON(!ring->funcs->emit_rreg);
4153 
4154 	spin_lock_irqsave(&kiq->ring_lock, flags);
4155 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4156 		pr_err("critical bug! too many kiq readers\n");
4157 		goto failed_unlock;
4158 	}
4159 	amdgpu_ring_alloc(ring, 32);
4160 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4161 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4162 				(5 << 8) |	/* dst: memory */
4163 				(1 << 16) |	/* count sel */
4164 				(1 << 20));	/* write confirm */
4165 	amdgpu_ring_write(ring, 0);
4166 	amdgpu_ring_write(ring, 0);
4167 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4168 				reg_val_offs * 4));
4169 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4170 				reg_val_offs * 4));
4171 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4172 	if (r)
4173 		goto failed_undo;
4174 
4175 	amdgpu_ring_commit(ring);
4176 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4177 
4178 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4179 
4180 	/* don't wait anymore for gpu reset case because this way may
4181 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4182 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4183 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4184 	 * gpu_recover() hang there.
4185 	 *
4186 	 * also don't wait anymore for IRQ context
4187 	 * */
4188 	if (r < 1 && (amdgpu_in_reset(adev)))
4189 		goto failed_kiq_read;
4190 
4191 	might_sleep();
4192 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4193 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4194 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4195 	}
4196 
4197 	if (cnt > MAX_KIQ_REG_TRY)
4198 		goto failed_kiq_read;
4199 
4200 	mb();
4201 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4202 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4203 	amdgpu_device_wb_free(adev, reg_val_offs);
4204 	return value;
4205 
4206 failed_undo:
4207 	amdgpu_ring_undo(ring);
4208 failed_unlock:
4209 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4210 failed_kiq_read:
4211 	if (reg_val_offs)
4212 		amdgpu_device_wb_free(adev, reg_val_offs);
4213 	pr_err("failed to read gpu clock\n");
4214 	return ~0;
4215 }
4216 
4217 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4218 {
4219 	uint64_t clock, clock_lo, clock_hi, hi_check;
4220 
4221 	switch (adev->ip_versions[GC_HWIP][0]) {
4222 	case IP_VERSION(9, 3, 0):
4223 		preempt_disable();
4224 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4225 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4226 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4227 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4228 		 * roughly every 42 seconds.
4229 		 */
4230 		if (hi_check != clock_hi) {
4231 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4232 			clock_hi = hi_check;
4233 		}
4234 		preempt_enable();
4235 		clock = clock_lo | (clock_hi << 32ULL);
4236 		break;
4237 	default:
4238 		amdgpu_gfx_off_ctrl(adev, false);
4239 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4240 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4241 			clock = gfx_v9_0_kiq_read_clock(adev);
4242 		} else {
4243 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4244 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4245 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4246 		}
4247 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4248 		amdgpu_gfx_off_ctrl(adev, true);
4249 		break;
4250 	}
4251 	return clock;
4252 }
4253 
4254 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4255 					  uint32_t vmid,
4256 					  uint32_t gds_base, uint32_t gds_size,
4257 					  uint32_t gws_base, uint32_t gws_size,
4258 					  uint32_t oa_base, uint32_t oa_size)
4259 {
4260 	struct amdgpu_device *adev = ring->adev;
4261 
4262 	/* GDS Base */
4263 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4264 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4265 				   gds_base);
4266 
4267 	/* GDS Size */
4268 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4269 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4270 				   gds_size);
4271 
4272 	/* GWS */
4273 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4274 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4275 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4276 
4277 	/* OA */
4278 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4279 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4280 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4281 }
4282 
4283 static const u32 vgpr_init_compute_shader[] =
4284 {
4285 	0xb07c0000, 0xbe8000ff,
4286 	0x000000f8, 0xbf110800,
4287 	0x7e000280, 0x7e020280,
4288 	0x7e040280, 0x7e060280,
4289 	0x7e080280, 0x7e0a0280,
4290 	0x7e0c0280, 0x7e0e0280,
4291 	0x80808800, 0xbe803200,
4292 	0xbf84fff5, 0xbf9c0000,
4293 	0xd28c0001, 0x0001007f,
4294 	0xd28d0001, 0x0002027e,
4295 	0x10020288, 0xb8810904,
4296 	0xb7814000, 0xd1196a01,
4297 	0x00000301, 0xbe800087,
4298 	0xbefc00c1, 0xd89c4000,
4299 	0x00020201, 0xd89cc080,
4300 	0x00040401, 0x320202ff,
4301 	0x00000800, 0x80808100,
4302 	0xbf84fff8, 0x7e020280,
4303 	0xbf810000, 0x00000000,
4304 };
4305 
4306 static const u32 sgpr_init_compute_shader[] =
4307 {
4308 	0xb07c0000, 0xbe8000ff,
4309 	0x0000005f, 0xbee50080,
4310 	0xbe812c65, 0xbe822c65,
4311 	0xbe832c65, 0xbe842c65,
4312 	0xbe852c65, 0xb77c0005,
4313 	0x80808500, 0xbf84fff8,
4314 	0xbe800080, 0xbf810000,
4315 };
4316 
4317 static const u32 vgpr_init_compute_shader_arcturus[] = {
4318 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4319 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4320 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4321 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4322 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4323 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4324 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4325 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4326 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4327 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4328 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4329 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4330 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4331 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4332 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4333 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4334 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4335 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4336 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4337 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4338 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4339 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4340 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4341 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4342 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4343 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4344 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4345 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4346 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4347 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4348 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4349 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4350 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4351 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4352 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4353 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4354 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4355 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4356 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4357 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4358 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4359 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4360 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4361 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4362 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4363 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4364 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4365 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4366 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4367 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4368 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4369 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4370 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4371 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4372 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4373 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4374 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4375 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4376 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4377 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4378 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4379 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4380 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4381 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4382 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4383 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4384 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4385 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4386 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4387 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4388 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4389 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4390 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4391 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4392 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4393 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4394 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4395 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4396 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4397 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4398 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4399 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4400 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4401 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4402 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4403 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4404 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4405 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4406 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4407 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4408 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4409 	0xbf84fff8, 0xbf810000,
4410 };
4411 
4412 /* When below register arrays changed, please update gpr_reg_size,
4413   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4414   to cover all gfx9 ASICs */
4415 static const struct soc15_reg_entry vgpr_init_regs[] = {
4416    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4430 };
4431 
4432 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4433    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4447 };
4448 
4449 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4464 };
4465 
4466 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4481 };
4482 
4483 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4484    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4485    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4486    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4487    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4488    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4489    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4490    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4498    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4499    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4500    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4501    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4502    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4503    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4504    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4507    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4508    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4510    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4511    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4512    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4513    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4514    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4515    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4516    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4517 };
4518 
4519 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4520 {
4521 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4522 	int i, r;
4523 
4524 	/* only support when RAS is enabled */
4525 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4526 		return 0;
4527 
4528 	r = amdgpu_ring_alloc(ring, 7);
4529 	if (r) {
4530 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4531 			ring->name, r);
4532 		return r;
4533 	}
4534 
4535 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4536 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4537 
4538 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4539 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4540 				PACKET3_DMA_DATA_DST_SEL(1) |
4541 				PACKET3_DMA_DATA_SRC_SEL(2) |
4542 				PACKET3_DMA_DATA_ENGINE(0)));
4543 	amdgpu_ring_write(ring, 0);
4544 	amdgpu_ring_write(ring, 0);
4545 	amdgpu_ring_write(ring, 0);
4546 	amdgpu_ring_write(ring, 0);
4547 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4548 				adev->gds.gds_size);
4549 
4550 	amdgpu_ring_commit(ring);
4551 
4552 	for (i = 0; i < adev->usec_timeout; i++) {
4553 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4554 			break;
4555 		udelay(1);
4556 	}
4557 
4558 	if (i >= adev->usec_timeout)
4559 		r = -ETIMEDOUT;
4560 
4561 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4562 
4563 	return r;
4564 }
4565 
4566 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4567 {
4568 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4569 	struct amdgpu_ib ib;
4570 	struct dma_fence *f = NULL;
4571 	int r, i;
4572 	unsigned total_size, vgpr_offset, sgpr_offset;
4573 	u64 gpu_addr;
4574 
4575 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4576 						adev->gfx.config.max_cu_per_sh *
4577 						adev->gfx.config.max_sh_per_se;
4578 	int sgpr_work_group_size = 5;
4579 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4580 	int vgpr_init_shader_size;
4581 	const u32 *vgpr_init_shader_ptr;
4582 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4583 
4584 	/* only support when RAS is enabled */
4585 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4586 		return 0;
4587 
4588 	/* bail if the compute ring is not ready */
4589 	if (!ring->sched.ready)
4590 		return 0;
4591 
4592 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4593 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4594 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4595 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4596 	} else {
4597 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4598 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4599 		vgpr_init_regs_ptr = vgpr_init_regs;
4600 	}
4601 
4602 	total_size =
4603 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4604 	total_size +=
4605 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4606 	total_size +=
4607 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4608 	total_size = ALIGN(total_size, 256);
4609 	vgpr_offset = total_size;
4610 	total_size += ALIGN(vgpr_init_shader_size, 256);
4611 	sgpr_offset = total_size;
4612 	total_size += sizeof(sgpr_init_compute_shader);
4613 
4614 	/* allocate an indirect buffer to put the commands in */
4615 	memset(&ib, 0, sizeof(ib));
4616 	r = amdgpu_ib_get(adev, NULL, total_size,
4617 					AMDGPU_IB_POOL_DIRECT, &ib);
4618 	if (r) {
4619 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4620 		return r;
4621 	}
4622 
4623 	/* load the compute shaders */
4624 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4625 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4626 
4627 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4628 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4629 
4630 	/* init the ib length to 0 */
4631 	ib.length_dw = 0;
4632 
4633 	/* VGPR */
4634 	/* write the register state for the compute dispatch */
4635 	for (i = 0; i < gpr_reg_size; i++) {
4636 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4637 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4638 								- PACKET3_SET_SH_REG_START;
4639 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4640 	}
4641 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4642 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4643 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4644 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4645 							- PACKET3_SET_SH_REG_START;
4646 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4647 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4648 
4649 	/* write dispatch packet */
4650 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4651 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4652 	ib.ptr[ib.length_dw++] = 1; /* y */
4653 	ib.ptr[ib.length_dw++] = 1; /* z */
4654 	ib.ptr[ib.length_dw++] =
4655 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4656 
4657 	/* write CS partial flush packet */
4658 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4659 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4660 
4661 	/* SGPR1 */
4662 	/* write the register state for the compute dispatch */
4663 	for (i = 0; i < gpr_reg_size; i++) {
4664 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4665 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4666 								- PACKET3_SET_SH_REG_START;
4667 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4668 	}
4669 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4670 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4671 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4672 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4673 							- PACKET3_SET_SH_REG_START;
4674 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4675 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4676 
4677 	/* write dispatch packet */
4678 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4679 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4680 	ib.ptr[ib.length_dw++] = 1; /* y */
4681 	ib.ptr[ib.length_dw++] = 1; /* z */
4682 	ib.ptr[ib.length_dw++] =
4683 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4684 
4685 	/* write CS partial flush packet */
4686 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4687 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4688 
4689 	/* SGPR2 */
4690 	/* write the register state for the compute dispatch */
4691 	for (i = 0; i < gpr_reg_size; i++) {
4692 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4693 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4694 								- PACKET3_SET_SH_REG_START;
4695 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4696 	}
4697 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4698 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4699 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4700 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4701 							- PACKET3_SET_SH_REG_START;
4702 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4703 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4704 
4705 	/* write dispatch packet */
4706 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4707 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4708 	ib.ptr[ib.length_dw++] = 1; /* y */
4709 	ib.ptr[ib.length_dw++] = 1; /* z */
4710 	ib.ptr[ib.length_dw++] =
4711 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4712 
4713 	/* write CS partial flush packet */
4714 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4715 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4716 
4717 	/* shedule the ib on the ring */
4718 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4719 	if (r) {
4720 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4721 		goto fail;
4722 	}
4723 
4724 	/* wait for the GPU to finish processing the IB */
4725 	r = dma_fence_wait(f, false);
4726 	if (r) {
4727 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4728 		goto fail;
4729 	}
4730 
4731 fail:
4732 	amdgpu_ib_free(adev, &ib, NULL);
4733 	dma_fence_put(f);
4734 
4735 	return r;
4736 }
4737 
4738 static int gfx_v9_0_early_init(void *handle)
4739 {
4740 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4741 
4742 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4743 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4744 		adev->gfx.num_gfx_rings = 0;
4745 	else
4746 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4747 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4748 					  AMDGPU_MAX_COMPUTE_RINGS);
4749 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4750 	gfx_v9_0_set_ring_funcs(adev);
4751 	gfx_v9_0_set_irq_funcs(adev);
4752 	gfx_v9_0_set_gds_init(adev);
4753 	gfx_v9_0_set_rlc_funcs(adev);
4754 
4755 	/* init rlcg reg access ctrl */
4756 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4757 
4758 	return 0;
4759 }
4760 
4761 static int gfx_v9_0_ecc_late_init(void *handle)
4762 {
4763 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4764 	int r;
4765 
4766 	/*
4767 	 * Temp workaround to fix the issue that CP firmware fails to
4768 	 * update read pointer when CPDMA is writing clearing operation
4769 	 * to GDS in suspend/resume sequence on several cards. So just
4770 	 * limit this operation in cold boot sequence.
4771 	 */
4772 	if ((!adev->in_suspend) &&
4773 	    (adev->gds.gds_size)) {
4774 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4775 		if (r)
4776 			return r;
4777 	}
4778 
4779 	/* requires IBs so do in late init after IB pool is initialized */
4780 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4781 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4782 	else
4783 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4784 
4785 	if (r)
4786 		return r;
4787 
4788 	if (adev->gfx.ras &&
4789 	    adev->gfx.ras->enable_watchdog_timer)
4790 		adev->gfx.ras->enable_watchdog_timer(adev);
4791 
4792 	return 0;
4793 }
4794 
4795 static int gfx_v9_0_late_init(void *handle)
4796 {
4797 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4798 	int r;
4799 
4800 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4801 	if (r)
4802 		return r;
4803 
4804 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4805 	if (r)
4806 		return r;
4807 
4808 	r = gfx_v9_0_ecc_late_init(handle);
4809 	if (r)
4810 		return r;
4811 
4812 	return 0;
4813 }
4814 
4815 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4816 {
4817 	uint32_t rlc_setting;
4818 
4819 	/* if RLC is not enabled, do nothing */
4820 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4821 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4822 		return false;
4823 
4824 	return true;
4825 }
4826 
4827 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4828 {
4829 	uint32_t data;
4830 	unsigned i;
4831 
4832 	data = RLC_SAFE_MODE__CMD_MASK;
4833 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4834 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4835 
4836 	/* wait for RLC_SAFE_MODE */
4837 	for (i = 0; i < adev->usec_timeout; i++) {
4838 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4839 			break;
4840 		udelay(1);
4841 	}
4842 }
4843 
4844 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4845 {
4846 	uint32_t data;
4847 
4848 	data = RLC_SAFE_MODE__CMD_MASK;
4849 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4850 }
4851 
4852 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4853 						bool enable)
4854 {
4855 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4856 
4857 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4858 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4859 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4860 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4861 	} else {
4862 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4863 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4864 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4865 	}
4866 
4867 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4868 }
4869 
4870 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4871 						bool enable)
4872 {
4873 	/* TODO: double check if we need to perform under safe mode */
4874 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4875 
4876 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4877 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4878 	else
4879 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4880 
4881 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4882 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4883 	else
4884 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4885 
4886 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4887 }
4888 
4889 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4890 						      bool enable)
4891 {
4892 	uint32_t data, def;
4893 
4894 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4895 
4896 	/* It is disabled by HW by default */
4897 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4898 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4899 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4900 
4901 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4902 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4903 
4904 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4905 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4906 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4907 
4908 		/* only for Vega10 & Raven1 */
4909 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4910 
4911 		if (def != data)
4912 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4913 
4914 		/* MGLS is a global flag to control all MGLS in GFX */
4915 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4916 			/* 2 - RLC memory Light sleep */
4917 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4918 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4919 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4920 				if (def != data)
4921 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4922 			}
4923 			/* 3 - CP memory Light sleep */
4924 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4925 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4926 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4927 				if (def != data)
4928 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4929 			}
4930 		}
4931 	} else {
4932 		/* 1 - MGCG_OVERRIDE */
4933 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4934 
4935 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4936 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4937 
4938 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4939 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4940 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4941 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4942 
4943 		if (def != data)
4944 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4945 
4946 		/* 2 - disable MGLS in RLC */
4947 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4948 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4949 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4950 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4951 		}
4952 
4953 		/* 3 - disable MGLS in CP */
4954 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4955 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4956 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4957 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4958 		}
4959 	}
4960 
4961 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4962 }
4963 
4964 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4965 					   bool enable)
4966 {
4967 	uint32_t data, def;
4968 
4969 	if (!adev->gfx.num_gfx_rings)
4970 		return;
4971 
4972 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4973 
4974 	/* Enable 3D CGCG/CGLS */
4975 	if (enable) {
4976 		/* write cmd to clear cgcg/cgls ov */
4977 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4978 		/* unset CGCG override */
4979 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4980 		/* update CGCG and CGLS override bits */
4981 		if (def != data)
4982 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4983 
4984 		/* enable 3Dcgcg FSM(0x0000363f) */
4985 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4986 
4987 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4988 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4989 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4990 		else
4991 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4992 
4993 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4994 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4995 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4996 		if (def != data)
4997 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4998 
4999 		/* set IDLE_POLL_COUNT(0x00900100) */
5000 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5001 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5002 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5003 		if (def != data)
5004 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5005 	} else {
5006 		/* Disable CGCG/CGLS */
5007 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5008 		/* disable cgcg, cgls should be disabled */
5009 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5010 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5011 		/* disable cgcg and cgls in FSM */
5012 		if (def != data)
5013 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5014 	}
5015 
5016 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5017 }
5018 
5019 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5020 						      bool enable)
5021 {
5022 	uint32_t def, data;
5023 
5024 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5025 
5026 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5027 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5028 		/* unset CGCG override */
5029 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5030 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5031 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5032 		else
5033 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5034 		/* update CGCG and CGLS override bits */
5035 		if (def != data)
5036 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5037 
5038 		/* enable cgcg FSM(0x0000363F) */
5039 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5040 
5041 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5042 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5043 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5044 		else
5045 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5046 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5047 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5048 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5049 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5050 		if (def != data)
5051 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5052 
5053 		/* set IDLE_POLL_COUNT(0x00900100) */
5054 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5055 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5056 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5057 		if (def != data)
5058 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5059 	} else {
5060 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5061 		/* reset CGCG/CGLS bits */
5062 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5063 		/* disable cgcg and cgls in FSM */
5064 		if (def != data)
5065 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5066 	}
5067 
5068 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5069 }
5070 
5071 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5072 					    bool enable)
5073 {
5074 	if (enable) {
5075 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5076 		 * ===  MGCG + MGLS ===
5077 		 */
5078 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5079 		/* ===  CGCG /CGLS for GFX 3D Only === */
5080 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5081 		/* ===  CGCG + CGLS === */
5082 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5083 	} else {
5084 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5085 		 * ===  CGCG + CGLS ===
5086 		 */
5087 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5088 		/* ===  CGCG /CGLS for GFX 3D Only === */
5089 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5090 		/* ===  MGCG + MGLS === */
5091 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5092 	}
5093 	return 0;
5094 }
5095 
5096 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5097 {
5098 	u32 reg, data;
5099 
5100 	amdgpu_gfx_off_ctrl(adev, false);
5101 
5102 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5103 	if (amdgpu_sriov_is_pp_one_vf(adev))
5104 		data = RREG32_NO_KIQ(reg);
5105 	else
5106 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5107 
5108 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5109 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5110 
5111 	if (amdgpu_sriov_is_pp_one_vf(adev))
5112 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5113 	else
5114 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5115 
5116 	amdgpu_gfx_off_ctrl(adev, true);
5117 }
5118 
5119 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5120 					uint32_t offset,
5121 					struct soc15_reg_rlcg *entries, int arr_size)
5122 {
5123 	int i;
5124 	uint32_t reg;
5125 
5126 	if (!entries)
5127 		return false;
5128 
5129 	for (i = 0; i < arr_size; i++) {
5130 		const struct soc15_reg_rlcg *entry;
5131 
5132 		entry = &entries[i];
5133 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5134 		if (offset == reg)
5135 			return true;
5136 	}
5137 
5138 	return false;
5139 }
5140 
5141 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5142 {
5143 	return gfx_v9_0_check_rlcg_range(adev, offset,
5144 					(void *)rlcg_access_gc_9_0,
5145 					ARRAY_SIZE(rlcg_access_gc_9_0));
5146 }
5147 
5148 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5149 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5150 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5151 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5152 	.init = gfx_v9_0_rlc_init,
5153 	.get_csb_size = gfx_v9_0_get_csb_size,
5154 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5155 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5156 	.resume = gfx_v9_0_rlc_resume,
5157 	.stop = gfx_v9_0_rlc_stop,
5158 	.reset = gfx_v9_0_rlc_reset,
5159 	.start = gfx_v9_0_rlc_start,
5160 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5161 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5162 };
5163 
5164 static int gfx_v9_0_set_powergating_state(void *handle,
5165 					  enum amd_powergating_state state)
5166 {
5167 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5168 	bool enable = (state == AMD_PG_STATE_GATE);
5169 
5170 	switch (adev->ip_versions[GC_HWIP][0]) {
5171 	case IP_VERSION(9, 2, 2):
5172 	case IP_VERSION(9, 1, 0):
5173 	case IP_VERSION(9, 3, 0):
5174 		if (!enable)
5175 			amdgpu_gfx_off_ctrl(adev, false);
5176 
5177 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5178 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5179 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5180 		} else {
5181 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5182 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5183 		}
5184 
5185 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5186 			gfx_v9_0_enable_cp_power_gating(adev, true);
5187 		else
5188 			gfx_v9_0_enable_cp_power_gating(adev, false);
5189 
5190 		/* update gfx cgpg state */
5191 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5192 
5193 		/* update mgcg state */
5194 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5195 
5196 		if (enable)
5197 			amdgpu_gfx_off_ctrl(adev, true);
5198 		break;
5199 	case IP_VERSION(9, 2, 1):
5200 		amdgpu_gfx_off_ctrl(adev, enable);
5201 		break;
5202 	default:
5203 		break;
5204 	}
5205 
5206 	return 0;
5207 }
5208 
5209 static int gfx_v9_0_set_clockgating_state(void *handle,
5210 					  enum amd_clockgating_state state)
5211 {
5212 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5213 
5214 	if (amdgpu_sriov_vf(adev))
5215 		return 0;
5216 
5217 	switch (adev->ip_versions[GC_HWIP][0]) {
5218 	case IP_VERSION(9, 0, 1):
5219 	case IP_VERSION(9, 2, 1):
5220 	case IP_VERSION(9, 4, 0):
5221 	case IP_VERSION(9, 2, 2):
5222 	case IP_VERSION(9, 1, 0):
5223 	case IP_VERSION(9, 4, 1):
5224 	case IP_VERSION(9, 3, 0):
5225 	case IP_VERSION(9, 4, 2):
5226 		gfx_v9_0_update_gfx_clock_gating(adev,
5227 						 state == AMD_CG_STATE_GATE);
5228 		break;
5229 	default:
5230 		break;
5231 	}
5232 	return 0;
5233 }
5234 
5235 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5236 {
5237 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238 	int data;
5239 
5240 	if (amdgpu_sriov_vf(adev))
5241 		*flags = 0;
5242 
5243 	/* AMD_CG_SUPPORT_GFX_MGCG */
5244 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5245 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5246 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5247 
5248 	/* AMD_CG_SUPPORT_GFX_CGCG */
5249 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5250 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5251 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5252 
5253 	/* AMD_CG_SUPPORT_GFX_CGLS */
5254 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5255 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5256 
5257 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5258 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5259 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5260 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5261 
5262 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5263 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5264 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5265 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5266 
5267 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5268 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5269 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5270 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5271 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5272 
5273 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5274 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5275 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5276 	}
5277 }
5278 
5279 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5280 {
5281 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5282 }
5283 
5284 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5285 {
5286 	struct amdgpu_device *adev = ring->adev;
5287 	u64 wptr;
5288 
5289 	/* XXX check if swapping is necessary on BE */
5290 	if (ring->use_doorbell) {
5291 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5292 	} else {
5293 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5294 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5295 	}
5296 
5297 	return wptr;
5298 }
5299 
5300 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5301 {
5302 	struct amdgpu_device *adev = ring->adev;
5303 
5304 	if (ring->use_doorbell) {
5305 		/* XXX check if swapping is necessary on BE */
5306 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5307 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5308 	} else {
5309 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5310 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5311 	}
5312 }
5313 
5314 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5315 {
5316 	struct amdgpu_device *adev = ring->adev;
5317 	u32 ref_and_mask, reg_mem_engine;
5318 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5319 
5320 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5321 		switch (ring->me) {
5322 		case 1:
5323 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5324 			break;
5325 		case 2:
5326 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5327 			break;
5328 		default:
5329 			return;
5330 		}
5331 		reg_mem_engine = 0;
5332 	} else {
5333 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5334 		reg_mem_engine = 1; /* pfp */
5335 	}
5336 
5337 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5338 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5339 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5340 			      ref_and_mask, ref_and_mask, 0x20);
5341 }
5342 
5343 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5344 					struct amdgpu_job *job,
5345 					struct amdgpu_ib *ib,
5346 					uint32_t flags)
5347 {
5348 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5349 	u32 header, control = 0;
5350 
5351 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5352 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5353 	else
5354 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5355 
5356 	control |= ib->length_dw | (vmid << 24);
5357 
5358 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5359 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5360 
5361 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5362 			gfx_v9_0_ring_emit_de_meta(ring);
5363 	}
5364 
5365 	amdgpu_ring_write(ring, header);
5366 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5367 	amdgpu_ring_write(ring,
5368 #ifdef __BIG_ENDIAN
5369 		(2 << 0) |
5370 #endif
5371 		lower_32_bits(ib->gpu_addr));
5372 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5373 	amdgpu_ring_write(ring, control);
5374 }
5375 
5376 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5377 					  struct amdgpu_job *job,
5378 					  struct amdgpu_ib *ib,
5379 					  uint32_t flags)
5380 {
5381 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5382 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5383 
5384 	/* Currently, there is a high possibility to get wave ID mismatch
5385 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5386 	 * different wave IDs than the GDS expects. This situation happens
5387 	 * randomly when at least 5 compute pipes use GDS ordered append.
5388 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5389 	 * Those are probably bugs somewhere else in the kernel driver.
5390 	 *
5391 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5392 	 * GDS to 0 for this ring (me/pipe).
5393 	 */
5394 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5395 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5396 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5397 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5398 	}
5399 
5400 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5401 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5402 	amdgpu_ring_write(ring,
5403 #ifdef __BIG_ENDIAN
5404 				(2 << 0) |
5405 #endif
5406 				lower_32_bits(ib->gpu_addr));
5407 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5408 	amdgpu_ring_write(ring, control);
5409 }
5410 
5411 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5412 				     u64 seq, unsigned flags)
5413 {
5414 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5415 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5416 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5417 
5418 	/* RELEASE_MEM - flush caches, send int */
5419 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5420 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5421 					       EOP_TC_NC_ACTION_EN) :
5422 					      (EOP_TCL1_ACTION_EN |
5423 					       EOP_TC_ACTION_EN |
5424 					       EOP_TC_WB_ACTION_EN |
5425 					       EOP_TC_MD_ACTION_EN)) |
5426 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5427 				 EVENT_INDEX(5)));
5428 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5429 
5430 	/*
5431 	 * the address should be Qword aligned if 64bit write, Dword
5432 	 * aligned if only send 32bit data low (discard data high)
5433 	 */
5434 	if (write64bit)
5435 		BUG_ON(addr & 0x7);
5436 	else
5437 		BUG_ON(addr & 0x3);
5438 	amdgpu_ring_write(ring, lower_32_bits(addr));
5439 	amdgpu_ring_write(ring, upper_32_bits(addr));
5440 	amdgpu_ring_write(ring, lower_32_bits(seq));
5441 	amdgpu_ring_write(ring, upper_32_bits(seq));
5442 	amdgpu_ring_write(ring, 0);
5443 }
5444 
5445 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5446 {
5447 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5448 	uint32_t seq = ring->fence_drv.sync_seq;
5449 	uint64_t addr = ring->fence_drv.gpu_addr;
5450 
5451 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5452 			      lower_32_bits(addr), upper_32_bits(addr),
5453 			      seq, 0xffffffff, 4);
5454 }
5455 
5456 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5457 					unsigned vmid, uint64_t pd_addr)
5458 {
5459 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5460 
5461 	/* compute doesn't have PFP */
5462 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5463 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5464 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5465 		amdgpu_ring_write(ring, 0x0);
5466 	}
5467 }
5468 
5469 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5470 {
5471 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5472 }
5473 
5474 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5475 {
5476 	u64 wptr;
5477 
5478 	/* XXX check if swapping is necessary on BE */
5479 	if (ring->use_doorbell)
5480 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5481 	else
5482 		BUG();
5483 	return wptr;
5484 }
5485 
5486 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5487 {
5488 	struct amdgpu_device *adev = ring->adev;
5489 
5490 	/* XXX check if swapping is necessary on BE */
5491 	if (ring->use_doorbell) {
5492 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5493 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5494 	} else{
5495 		BUG(); /* only DOORBELL method supported on gfx9 now */
5496 	}
5497 }
5498 
5499 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5500 					 u64 seq, unsigned int flags)
5501 {
5502 	struct amdgpu_device *adev = ring->adev;
5503 
5504 	/* we only allocate 32bit for each seq wb address */
5505 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5506 
5507 	/* write fence seq to the "addr" */
5508 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5509 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5510 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5511 	amdgpu_ring_write(ring, lower_32_bits(addr));
5512 	amdgpu_ring_write(ring, upper_32_bits(addr));
5513 	amdgpu_ring_write(ring, lower_32_bits(seq));
5514 
5515 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5516 		/* set register to trigger INT */
5517 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5518 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5519 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5520 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5521 		amdgpu_ring_write(ring, 0);
5522 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5523 	}
5524 }
5525 
5526 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5527 {
5528 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5529 	amdgpu_ring_write(ring, 0);
5530 }
5531 
5532 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5533 {
5534 	struct v9_ce_ib_state ce_payload = {0};
5535 	uint64_t csa_addr;
5536 	int cnt;
5537 
5538 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5539 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5540 
5541 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5542 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5543 				 WRITE_DATA_DST_SEL(8) |
5544 				 WR_CONFIRM) |
5545 				 WRITE_DATA_CACHE_POLICY(0));
5546 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5547 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5548 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5549 }
5550 
5551 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5552 {
5553 	struct v9_de_ib_state de_payload = {0};
5554 	uint64_t csa_addr, gds_addr;
5555 	int cnt;
5556 
5557 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5558 	gds_addr = csa_addr + 4096;
5559 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5560 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5561 
5562 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5563 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5564 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5565 				 WRITE_DATA_DST_SEL(8) |
5566 				 WR_CONFIRM) |
5567 				 WRITE_DATA_CACHE_POLICY(0));
5568 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5569 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5570 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5571 }
5572 
5573 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5574 				   bool secure)
5575 {
5576 	uint32_t v = secure ? FRAME_TMZ : 0;
5577 
5578 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5579 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5580 }
5581 
5582 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5583 {
5584 	uint32_t dw2 = 0;
5585 
5586 	if (amdgpu_sriov_vf(ring->adev))
5587 		gfx_v9_0_ring_emit_ce_meta(ring);
5588 
5589 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5590 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5591 		/* set load_global_config & load_global_uconfig */
5592 		dw2 |= 0x8001;
5593 		/* set load_cs_sh_regs */
5594 		dw2 |= 0x01000000;
5595 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5596 		dw2 |= 0x10002;
5597 
5598 		/* set load_ce_ram if preamble presented */
5599 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5600 			dw2 |= 0x10000000;
5601 	} else {
5602 		/* still load_ce_ram if this is the first time preamble presented
5603 		 * although there is no context switch happens.
5604 		 */
5605 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5606 			dw2 |= 0x10000000;
5607 	}
5608 
5609 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5610 	amdgpu_ring_write(ring, dw2);
5611 	amdgpu_ring_write(ring, 0);
5612 }
5613 
5614 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5615 {
5616 	unsigned ret;
5617 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5618 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5619 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5620 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5621 	ret = ring->wptr & ring->buf_mask;
5622 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5623 	return ret;
5624 }
5625 
5626 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5627 {
5628 	unsigned cur;
5629 	BUG_ON(offset > ring->buf_mask);
5630 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5631 
5632 	cur = (ring->wptr & ring->buf_mask) - 1;
5633 	if (likely(cur > offset))
5634 		ring->ring[offset] = cur - offset;
5635 	else
5636 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5637 }
5638 
5639 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5640 				    uint32_t reg_val_offs)
5641 {
5642 	struct amdgpu_device *adev = ring->adev;
5643 
5644 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5645 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5646 				(5 << 8) |	/* dst: memory */
5647 				(1 << 20));	/* write confirm */
5648 	amdgpu_ring_write(ring, reg);
5649 	amdgpu_ring_write(ring, 0);
5650 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5651 				reg_val_offs * 4));
5652 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5653 				reg_val_offs * 4));
5654 }
5655 
5656 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5657 				    uint32_t val)
5658 {
5659 	uint32_t cmd = 0;
5660 
5661 	switch (ring->funcs->type) {
5662 	case AMDGPU_RING_TYPE_GFX:
5663 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5664 		break;
5665 	case AMDGPU_RING_TYPE_KIQ:
5666 		cmd = (1 << 16); /* no inc addr */
5667 		break;
5668 	default:
5669 		cmd = WR_CONFIRM;
5670 		break;
5671 	}
5672 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5673 	amdgpu_ring_write(ring, cmd);
5674 	amdgpu_ring_write(ring, reg);
5675 	amdgpu_ring_write(ring, 0);
5676 	amdgpu_ring_write(ring, val);
5677 }
5678 
5679 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5680 					uint32_t val, uint32_t mask)
5681 {
5682 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5683 }
5684 
5685 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5686 						  uint32_t reg0, uint32_t reg1,
5687 						  uint32_t ref, uint32_t mask)
5688 {
5689 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5690 	struct amdgpu_device *adev = ring->adev;
5691 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5692 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5693 
5694 	if (fw_version_ok)
5695 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5696 				      ref, mask, 0x20);
5697 	else
5698 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5699 							   ref, mask);
5700 }
5701 
5702 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5703 {
5704 	struct amdgpu_device *adev = ring->adev;
5705 	uint32_t value = 0;
5706 
5707 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5708 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5709 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5710 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5711 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5712 }
5713 
5714 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5715 						 enum amdgpu_interrupt_state state)
5716 {
5717 	switch (state) {
5718 	case AMDGPU_IRQ_STATE_DISABLE:
5719 	case AMDGPU_IRQ_STATE_ENABLE:
5720 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5721 			       TIME_STAMP_INT_ENABLE,
5722 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5723 		break;
5724 	default:
5725 		break;
5726 	}
5727 }
5728 
5729 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5730 						     int me, int pipe,
5731 						     enum amdgpu_interrupt_state state)
5732 {
5733 	u32 mec_int_cntl, mec_int_cntl_reg;
5734 
5735 	/*
5736 	 * amdgpu controls only the first MEC. That's why this function only
5737 	 * handles the setting of interrupts for this specific MEC. All other
5738 	 * pipes' interrupts are set by amdkfd.
5739 	 */
5740 
5741 	if (me == 1) {
5742 		switch (pipe) {
5743 		case 0:
5744 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5745 			break;
5746 		case 1:
5747 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5748 			break;
5749 		case 2:
5750 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5751 			break;
5752 		case 3:
5753 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5754 			break;
5755 		default:
5756 			DRM_DEBUG("invalid pipe %d\n", pipe);
5757 			return;
5758 		}
5759 	} else {
5760 		DRM_DEBUG("invalid me %d\n", me);
5761 		return;
5762 	}
5763 
5764 	switch (state) {
5765 	case AMDGPU_IRQ_STATE_DISABLE:
5766 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5767 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5768 					     TIME_STAMP_INT_ENABLE, 0);
5769 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5770 		break;
5771 	case AMDGPU_IRQ_STATE_ENABLE:
5772 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5773 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5774 					     TIME_STAMP_INT_ENABLE, 1);
5775 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5776 		break;
5777 	default:
5778 		break;
5779 	}
5780 }
5781 
5782 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5783 					     struct amdgpu_irq_src *source,
5784 					     unsigned type,
5785 					     enum amdgpu_interrupt_state state)
5786 {
5787 	switch (state) {
5788 	case AMDGPU_IRQ_STATE_DISABLE:
5789 	case AMDGPU_IRQ_STATE_ENABLE:
5790 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5791 			       PRIV_REG_INT_ENABLE,
5792 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5793 		break;
5794 	default:
5795 		break;
5796 	}
5797 
5798 	return 0;
5799 }
5800 
5801 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5802 					      struct amdgpu_irq_src *source,
5803 					      unsigned type,
5804 					      enum amdgpu_interrupt_state state)
5805 {
5806 	switch (state) {
5807 	case AMDGPU_IRQ_STATE_DISABLE:
5808 	case AMDGPU_IRQ_STATE_ENABLE:
5809 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5810 			       PRIV_INSTR_INT_ENABLE,
5811 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5812 		break;
5813 	default:
5814 		break;
5815 	}
5816 
5817 	return 0;
5818 }
5819 
5820 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5821 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5822 			CP_ECC_ERROR_INT_ENABLE, 1)
5823 
5824 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5825 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5826 			CP_ECC_ERROR_INT_ENABLE, 0)
5827 
5828 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5829 					      struct amdgpu_irq_src *source,
5830 					      unsigned type,
5831 					      enum amdgpu_interrupt_state state)
5832 {
5833 	switch (state) {
5834 	case AMDGPU_IRQ_STATE_DISABLE:
5835 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5836 				CP_ECC_ERROR_INT_ENABLE, 0);
5837 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5838 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5839 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5840 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5841 		break;
5842 
5843 	case AMDGPU_IRQ_STATE_ENABLE:
5844 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5845 				CP_ECC_ERROR_INT_ENABLE, 1);
5846 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5847 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5848 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5849 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5850 		break;
5851 	default:
5852 		break;
5853 	}
5854 
5855 	return 0;
5856 }
5857 
5858 
5859 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5860 					    struct amdgpu_irq_src *src,
5861 					    unsigned type,
5862 					    enum amdgpu_interrupt_state state)
5863 {
5864 	switch (type) {
5865 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5866 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5867 		break;
5868 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5869 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5870 		break;
5871 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5872 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5873 		break;
5874 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5875 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5876 		break;
5877 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5878 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5879 		break;
5880 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5881 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5882 		break;
5883 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5884 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5885 		break;
5886 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5887 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5888 		break;
5889 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5890 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5891 		break;
5892 	default:
5893 		break;
5894 	}
5895 	return 0;
5896 }
5897 
5898 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5899 			    struct amdgpu_irq_src *source,
5900 			    struct amdgpu_iv_entry *entry)
5901 {
5902 	int i;
5903 	u8 me_id, pipe_id, queue_id;
5904 	struct amdgpu_ring *ring;
5905 
5906 	DRM_DEBUG("IH: CP EOP\n");
5907 	me_id = (entry->ring_id & 0x0c) >> 2;
5908 	pipe_id = (entry->ring_id & 0x03) >> 0;
5909 	queue_id = (entry->ring_id & 0x70) >> 4;
5910 
5911 	switch (me_id) {
5912 	case 0:
5913 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5914 		break;
5915 	case 1:
5916 	case 2:
5917 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5918 			ring = &adev->gfx.compute_ring[i];
5919 			/* Per-queue interrupt is supported for MEC starting from VI.
5920 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5921 			  */
5922 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5923 				amdgpu_fence_process(ring);
5924 		}
5925 		break;
5926 	}
5927 	return 0;
5928 }
5929 
5930 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5931 			   struct amdgpu_iv_entry *entry)
5932 {
5933 	u8 me_id, pipe_id, queue_id;
5934 	struct amdgpu_ring *ring;
5935 	int i;
5936 
5937 	me_id = (entry->ring_id & 0x0c) >> 2;
5938 	pipe_id = (entry->ring_id & 0x03) >> 0;
5939 	queue_id = (entry->ring_id & 0x70) >> 4;
5940 
5941 	switch (me_id) {
5942 	case 0:
5943 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5944 		break;
5945 	case 1:
5946 	case 2:
5947 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5948 			ring = &adev->gfx.compute_ring[i];
5949 			if (ring->me == me_id && ring->pipe == pipe_id &&
5950 			    ring->queue == queue_id)
5951 				drm_sched_fault(&ring->sched);
5952 		}
5953 		break;
5954 	}
5955 }
5956 
5957 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5958 				 struct amdgpu_irq_src *source,
5959 				 struct amdgpu_iv_entry *entry)
5960 {
5961 	DRM_ERROR("Illegal register access in command stream\n");
5962 	gfx_v9_0_fault(adev, entry);
5963 	return 0;
5964 }
5965 
5966 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5967 				  struct amdgpu_irq_src *source,
5968 				  struct amdgpu_iv_entry *entry)
5969 {
5970 	DRM_ERROR("Illegal instruction in command stream\n");
5971 	gfx_v9_0_fault(adev, entry);
5972 	return 0;
5973 }
5974 
5975 
5976 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5977 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5978 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5979 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5980 	},
5981 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5982 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5983 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5984 	},
5985 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5986 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5987 	  0, 0
5988 	},
5989 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5990 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5991 	  0, 0
5992 	},
5993 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5994 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5995 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5996 	},
5997 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5998 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5999 	  0, 0
6000 	},
6001 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6002 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6003 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6004 	},
6005 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6006 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6007 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6008 	},
6009 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6010 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6011 	  0, 0
6012 	},
6013 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6014 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6015 	  0, 0
6016 	},
6017 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6018 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6019 	  0, 0
6020 	},
6021 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6022 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6023 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6024 	},
6025 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6026 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6027 	  0, 0
6028 	},
6029 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6030 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6031 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6032 	},
6033 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6034 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6035 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6036 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6037 	},
6038 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6039 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6040 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6041 	  0, 0
6042 	},
6043 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6044 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6045 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6046 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6047 	},
6048 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6049 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6050 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6051 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6052 	},
6053 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6054 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6055 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6056 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6057 	},
6058 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6059 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6060 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6061 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6062 	},
6063 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6064 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6065 	  0, 0
6066 	},
6067 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6068 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6069 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6070 	},
6071 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6072 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6073 	  0, 0
6074 	},
6075 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6076 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6077 	  0, 0
6078 	},
6079 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6080 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6081 	  0, 0
6082 	},
6083 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6084 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6085 	  0, 0
6086 	},
6087 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6088 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6089 	  0, 0
6090 	},
6091 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6092 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6096 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6097 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6098 	},
6099 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6100 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6101 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6102 	},
6103 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6104 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6105 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6106 	},
6107 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6108 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6109 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6110 	},
6111 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6112 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6113 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6114 	},
6115 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6116 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6117 	  0, 0
6118 	},
6119 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6120 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6121 	  0, 0
6122 	},
6123 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6124 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6125 	  0, 0
6126 	},
6127 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6128 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6129 	  0, 0
6130 	},
6131 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6132 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6133 	  0, 0
6134 	},
6135 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6136 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6137 	  0, 0
6138 	},
6139 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6140 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6141 	  0, 0
6142 	},
6143 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6144 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6145 	  0, 0
6146 	},
6147 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6148 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6149 	  0, 0
6150 	},
6151 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6152 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6153 	  0, 0
6154 	},
6155 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6156 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6157 	  0, 0
6158 	},
6159 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6160 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6161 	  0, 0
6162 	},
6163 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6164 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6165 	  0, 0
6166 	},
6167 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6168 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6169 	  0, 0
6170 	},
6171 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6172 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6173 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6174 	},
6175 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6176 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6177 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6178 	},
6179 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6180 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6181 	  0, 0
6182 	},
6183 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6184 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6185 	  0, 0
6186 	},
6187 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6188 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6189 	  0, 0
6190 	},
6191 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6192 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6193 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6194 	},
6195 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6196 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6197 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6198 	},
6199 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6200 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6201 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6202 	},
6203 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6204 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6205 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6206 	},
6207 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6208 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6209 	  0, 0
6210 	},
6211 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6212 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6213 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6214 	},
6215 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6216 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6217 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6218 	},
6219 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6220 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6221 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6222 	},
6223 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6224 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6225 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6226 	},
6227 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6228 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6229 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6230 	},
6231 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6232 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6233 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6234 	},
6235 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6236 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6237 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6238 	},
6239 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6240 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6241 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6242 	},
6243 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6244 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6245 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6246 	},
6247 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6248 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6249 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6250 	},
6251 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6252 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6253 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6254 	},
6255 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6256 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6257 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6258 	},
6259 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6260 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6261 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6262 	},
6263 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6264 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6265 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6266 	},
6267 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6268 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6270 	},
6271 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6272 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6274 	},
6275 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6276 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6278 	},
6279 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6280 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6281 	  0, 0
6282 	},
6283 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6284 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6285 	  0, 0
6286 	},
6287 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6288 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6289 	  0, 0
6290 	},
6291 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6292 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6293 	  0, 0
6294 	},
6295 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6296 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6297 	  0, 0
6298 	},
6299 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6300 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6301 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6302 	},
6303 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6304 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6305 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6306 	},
6307 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6308 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6309 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6310 	},
6311 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6312 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6313 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6314 	},
6315 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6316 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6317 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6318 	},
6319 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6320 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6321 	  0, 0
6322 	},
6323 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6324 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6325 	  0, 0
6326 	},
6327 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6328 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6329 	  0, 0
6330 	},
6331 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6332 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6333 	  0, 0
6334 	},
6335 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6336 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6337 	  0, 0
6338 	},
6339 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6340 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6341 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6342 	},
6343 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6344 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6345 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6346 	},
6347 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6348 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6350 	},
6351 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6352 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6354 	},
6355 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6356 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6358 	},
6359 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6360 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6361 	  0, 0
6362 	},
6363 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6364 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6365 	  0, 0
6366 	},
6367 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6368 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6369 	  0, 0
6370 	},
6371 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6372 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6373 	  0, 0
6374 	},
6375 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6376 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6377 	  0, 0
6378 	},
6379 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6380 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6381 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6382 	},
6383 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6384 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6385 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6386 	},
6387 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6388 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6389 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6390 	},
6391 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6392 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6393 	  0, 0
6394 	},
6395 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6396 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6397 	  0, 0
6398 	},
6399 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6400 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6401 	  0, 0
6402 	},
6403 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6404 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6405 	  0, 0
6406 	},
6407 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6408 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6409 	  0, 0
6410 	},
6411 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6412 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6413 	  0, 0
6414 	}
6415 };
6416 
6417 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6418 				     void *inject_if)
6419 {
6420 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6421 	int ret;
6422 	struct ta_ras_trigger_error_input block_info = { 0 };
6423 
6424 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6425 		return -EINVAL;
6426 
6427 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6428 		return -EINVAL;
6429 
6430 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6431 		return -EPERM;
6432 
6433 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6434 	      info->head.type)) {
6435 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6436 			ras_gfx_subblocks[info->head.sub_block_index].name,
6437 			info->head.type);
6438 		return -EPERM;
6439 	}
6440 
6441 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6442 	      info->head.type)) {
6443 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6444 			ras_gfx_subblocks[info->head.sub_block_index].name,
6445 			info->head.type);
6446 		return -EPERM;
6447 	}
6448 
6449 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6450 	block_info.sub_block_index =
6451 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6452 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6453 	block_info.address = info->address;
6454 	block_info.value = info->value;
6455 
6456 	mutex_lock(&adev->grbm_idx_mutex);
6457 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6458 	mutex_unlock(&adev->grbm_idx_mutex);
6459 
6460 	return ret;
6461 }
6462 
6463 static const char *vml2_mems[] = {
6464 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6465 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6466 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6467 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6468 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6469 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6470 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6471 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6472 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6473 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6474 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6475 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6476 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6477 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6478 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6479 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6480 };
6481 
6482 static const char *vml2_walker_mems[] = {
6483 	"UTC_VML2_CACHE_PDE0_MEM0",
6484 	"UTC_VML2_CACHE_PDE0_MEM1",
6485 	"UTC_VML2_CACHE_PDE1_MEM0",
6486 	"UTC_VML2_CACHE_PDE1_MEM1",
6487 	"UTC_VML2_CACHE_PDE2_MEM0",
6488 	"UTC_VML2_CACHE_PDE2_MEM1",
6489 	"UTC_VML2_RDIF_LOG_FIFO",
6490 };
6491 
6492 static const char *atc_l2_cache_2m_mems[] = {
6493 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6494 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6495 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6496 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6497 };
6498 
6499 static const char *atc_l2_cache_4k_mems[] = {
6500 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6501 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6502 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6503 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6504 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6505 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6506 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6507 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6508 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6509 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6510 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6511 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6512 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6513 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6514 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6515 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6516 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6517 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6518 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6519 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6520 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6521 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6522 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6523 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6524 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6525 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6526 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6527 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6528 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6529 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6530 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6531 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6532 };
6533 
6534 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6535 					 struct ras_err_data *err_data)
6536 {
6537 	uint32_t i, data;
6538 	uint32_t sec_count, ded_count;
6539 
6540 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6541 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6542 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6543 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6544 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6545 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6546 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6547 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6548 
6549 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6550 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6551 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6552 
6553 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6554 		if (sec_count) {
6555 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6556 				"SEC %d\n", i, vml2_mems[i], sec_count);
6557 			err_data->ce_count += sec_count;
6558 		}
6559 
6560 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6561 		if (ded_count) {
6562 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6563 				"DED %d\n", i, vml2_mems[i], ded_count);
6564 			err_data->ue_count += ded_count;
6565 		}
6566 	}
6567 
6568 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6569 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6570 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6571 
6572 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6573 						SEC_COUNT);
6574 		if (sec_count) {
6575 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6576 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6577 			err_data->ce_count += sec_count;
6578 		}
6579 
6580 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6581 						DED_COUNT);
6582 		if (ded_count) {
6583 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6584 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6585 			err_data->ue_count += ded_count;
6586 		}
6587 	}
6588 
6589 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6590 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6591 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6592 
6593 		sec_count = (data & 0x00006000L) >> 0xd;
6594 		if (sec_count) {
6595 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6596 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6597 				sec_count);
6598 			err_data->ce_count += sec_count;
6599 		}
6600 	}
6601 
6602 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6603 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6604 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6605 
6606 		sec_count = (data & 0x00006000L) >> 0xd;
6607 		if (sec_count) {
6608 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6609 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6610 				sec_count);
6611 			err_data->ce_count += sec_count;
6612 		}
6613 
6614 		ded_count = (data & 0x00018000L) >> 0xf;
6615 		if (ded_count) {
6616 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6618 				ded_count);
6619 			err_data->ue_count += ded_count;
6620 		}
6621 	}
6622 
6623 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6624 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6625 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6626 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6627 
6628 	return 0;
6629 }
6630 
6631 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6632 	const struct soc15_reg_entry *reg,
6633 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6634 	uint32_t *sec_count, uint32_t *ded_count)
6635 {
6636 	uint32_t i;
6637 	uint32_t sec_cnt, ded_cnt;
6638 
6639 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6640 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6641 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6642 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6643 			continue;
6644 
6645 		sec_cnt = (value &
6646 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6647 				gfx_v9_0_ras_fields[i].sec_count_shift;
6648 		if (sec_cnt) {
6649 			dev_info(adev->dev, "GFX SubBlock %s, "
6650 				"Instance[%d][%d], SEC %d\n",
6651 				gfx_v9_0_ras_fields[i].name,
6652 				se_id, inst_id,
6653 				sec_cnt);
6654 			*sec_count += sec_cnt;
6655 		}
6656 
6657 		ded_cnt = (value &
6658 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6659 				gfx_v9_0_ras_fields[i].ded_count_shift;
6660 		if (ded_cnt) {
6661 			dev_info(adev->dev, "GFX SubBlock %s, "
6662 				"Instance[%d][%d], DED %d\n",
6663 				gfx_v9_0_ras_fields[i].name,
6664 				se_id, inst_id,
6665 				ded_cnt);
6666 			*ded_count += ded_cnt;
6667 		}
6668 	}
6669 
6670 	return 0;
6671 }
6672 
6673 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6674 {
6675 	int i, j, k;
6676 
6677 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6678 		return;
6679 
6680 	/* read back registers to clear the counters */
6681 	mutex_lock(&adev->grbm_idx_mutex);
6682 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6683 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6684 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6685 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6686 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6687 			}
6688 		}
6689 	}
6690 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6691 	mutex_unlock(&adev->grbm_idx_mutex);
6692 
6693 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6694 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6695 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6696 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6697 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6698 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6699 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6700 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6701 
6702 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6703 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6704 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6705 	}
6706 
6707 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6708 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6709 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6710 	}
6711 
6712 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6713 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6714 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6715 	}
6716 
6717 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6718 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6719 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6720 	}
6721 
6722 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6723 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6724 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6725 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6726 }
6727 
6728 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6729 					  void *ras_error_status)
6730 {
6731 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6732 	uint32_t sec_count = 0, ded_count = 0;
6733 	uint32_t i, j, k;
6734 	uint32_t reg_value;
6735 
6736 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6737 		return;
6738 
6739 	err_data->ue_count = 0;
6740 	err_data->ce_count = 0;
6741 
6742 	mutex_lock(&adev->grbm_idx_mutex);
6743 
6744 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6745 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6746 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6747 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6748 				reg_value =
6749 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6750 				if (reg_value)
6751 					gfx_v9_0_ras_error_count(adev,
6752 						&gfx_v9_0_edc_counter_regs[i],
6753 						j, k, reg_value,
6754 						&sec_count, &ded_count);
6755 			}
6756 		}
6757 	}
6758 
6759 	err_data->ce_count += sec_count;
6760 	err_data->ue_count += ded_count;
6761 
6762 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6763 	mutex_unlock(&adev->grbm_idx_mutex);
6764 
6765 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6766 }
6767 
6768 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6769 {
6770 	const unsigned int cp_coher_cntl =
6771 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6772 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6773 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6774 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6775 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6776 
6777 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6778 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6779 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6780 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6781 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6782 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6783 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6784 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6785 }
6786 
6787 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6788 					uint32_t pipe, bool enable)
6789 {
6790 	struct amdgpu_device *adev = ring->adev;
6791 	uint32_t val;
6792 	uint32_t wcl_cs_reg;
6793 
6794 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6795 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6796 
6797 	switch (pipe) {
6798 	case 0:
6799 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6800 		break;
6801 	case 1:
6802 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6803 		break;
6804 	case 2:
6805 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6806 		break;
6807 	case 3:
6808 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6809 		break;
6810 	default:
6811 		DRM_DEBUG("invalid pipe %d\n", pipe);
6812 		return;
6813 	}
6814 
6815 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6816 
6817 }
6818 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6819 {
6820 	struct amdgpu_device *adev = ring->adev;
6821 	uint32_t val;
6822 	int i;
6823 
6824 
6825 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6826 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6827 	 * around 25% of gpu resources.
6828 	 */
6829 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6830 	amdgpu_ring_emit_wreg(ring,
6831 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6832 			      val);
6833 
6834 	/* Restrict waves for normal/low priority compute queues as well
6835 	 * to get best QoS for high priority compute jobs.
6836 	 *
6837 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6838 	 */
6839 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6840 		if (i != ring->pipe)
6841 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6842 
6843 	}
6844 }
6845 
6846 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6847 	.name = "gfx_v9_0",
6848 	.early_init = gfx_v9_0_early_init,
6849 	.late_init = gfx_v9_0_late_init,
6850 	.sw_init = gfx_v9_0_sw_init,
6851 	.sw_fini = gfx_v9_0_sw_fini,
6852 	.hw_init = gfx_v9_0_hw_init,
6853 	.hw_fini = gfx_v9_0_hw_fini,
6854 	.suspend = gfx_v9_0_suspend,
6855 	.resume = gfx_v9_0_resume,
6856 	.is_idle = gfx_v9_0_is_idle,
6857 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6858 	.soft_reset = gfx_v9_0_soft_reset,
6859 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6860 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6861 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6862 };
6863 
6864 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6865 	.type = AMDGPU_RING_TYPE_GFX,
6866 	.align_mask = 0xff,
6867 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6868 	.support_64bit_ptrs = true,
6869 	.secure_submission_supported = true,
6870 	.vmhub = AMDGPU_GFXHUB_0,
6871 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6872 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6873 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6874 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6875 		5 +  /* COND_EXEC */
6876 		7 +  /* PIPELINE_SYNC */
6877 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6878 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6879 		2 + /* VM_FLUSH */
6880 		8 +  /* FENCE for VM_FLUSH */
6881 		20 + /* GDS switch */
6882 		4 + /* double SWITCH_BUFFER,
6883 		       the first COND_EXEC jump to the place just
6884 			   prior to this double SWITCH_BUFFER  */
6885 		5 + /* COND_EXEC */
6886 		7 +	 /*	HDP_flush */
6887 		4 +	 /*	VGT_flush */
6888 		14 + /*	CE_META */
6889 		31 + /*	DE_META */
6890 		3 + /* CNTX_CTRL */
6891 		5 + /* HDP_INVL */
6892 		8 + 8 + /* FENCE x2 */
6893 		2 + /* SWITCH_BUFFER */
6894 		7, /* gfx_v9_0_emit_mem_sync */
6895 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6896 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6897 	.emit_fence = gfx_v9_0_ring_emit_fence,
6898 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6899 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6900 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6901 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6902 	.test_ring = gfx_v9_0_ring_test_ring,
6903 	.test_ib = gfx_v9_0_ring_test_ib,
6904 	.insert_nop = amdgpu_ring_insert_nop,
6905 	.pad_ib = amdgpu_ring_generic_pad_ib,
6906 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6907 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6908 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6909 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6910 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6911 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6912 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6913 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6914 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6915 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6916 };
6917 
6918 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6919 	.type = AMDGPU_RING_TYPE_COMPUTE,
6920 	.align_mask = 0xff,
6921 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6922 	.support_64bit_ptrs = true,
6923 	.vmhub = AMDGPU_GFXHUB_0,
6924 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6925 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6926 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6927 	.emit_frame_size =
6928 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6929 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6930 		5 + /* hdp invalidate */
6931 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6932 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6933 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6934 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6935 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6936 		7 + /* gfx_v9_0_emit_mem_sync */
6937 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6938 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6939 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6940 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6941 	.emit_fence = gfx_v9_0_ring_emit_fence,
6942 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6943 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6944 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6945 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6946 	.test_ring = gfx_v9_0_ring_test_ring,
6947 	.test_ib = gfx_v9_0_ring_test_ib,
6948 	.insert_nop = amdgpu_ring_insert_nop,
6949 	.pad_ib = amdgpu_ring_generic_pad_ib,
6950 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6951 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6952 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6953 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6954 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6955 };
6956 
6957 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6958 	.type = AMDGPU_RING_TYPE_KIQ,
6959 	.align_mask = 0xff,
6960 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6961 	.support_64bit_ptrs = true,
6962 	.vmhub = AMDGPU_GFXHUB_0,
6963 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6964 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6965 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6966 	.emit_frame_size =
6967 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6968 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6969 		5 + /* hdp invalidate */
6970 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6971 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6972 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6973 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6974 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6975 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6976 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6977 	.test_ring = gfx_v9_0_ring_test_ring,
6978 	.insert_nop = amdgpu_ring_insert_nop,
6979 	.pad_ib = amdgpu_ring_generic_pad_ib,
6980 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6981 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6982 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6983 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6984 };
6985 
6986 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6987 {
6988 	int i;
6989 
6990 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6991 
6992 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6993 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6994 
6995 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6996 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6997 }
6998 
6999 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7000 	.set = gfx_v9_0_set_eop_interrupt_state,
7001 	.process = gfx_v9_0_eop_irq,
7002 };
7003 
7004 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7005 	.set = gfx_v9_0_set_priv_reg_fault_state,
7006 	.process = gfx_v9_0_priv_reg_irq,
7007 };
7008 
7009 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7010 	.set = gfx_v9_0_set_priv_inst_fault_state,
7011 	.process = gfx_v9_0_priv_inst_irq,
7012 };
7013 
7014 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7015 	.set = gfx_v9_0_set_cp_ecc_error_state,
7016 	.process = amdgpu_gfx_cp_ecc_error_irq,
7017 };
7018 
7019 
7020 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7021 {
7022 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7023 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7024 
7025 	adev->gfx.priv_reg_irq.num_types = 1;
7026 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7027 
7028 	adev->gfx.priv_inst_irq.num_types = 1;
7029 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7030 
7031 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7032 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7033 }
7034 
7035 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7036 {
7037 	switch (adev->ip_versions[GC_HWIP][0]) {
7038 	case IP_VERSION(9, 0, 1):
7039 	case IP_VERSION(9, 2, 1):
7040 	case IP_VERSION(9, 4, 0):
7041 	case IP_VERSION(9, 2, 2):
7042 	case IP_VERSION(9, 1, 0):
7043 	case IP_VERSION(9, 4, 1):
7044 	case IP_VERSION(9, 3, 0):
7045 	case IP_VERSION(9, 4, 2):
7046 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7047 		break;
7048 	default:
7049 		break;
7050 	}
7051 }
7052 
7053 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7054 {
7055 	/* init asci gds info */
7056 	switch (adev->ip_versions[GC_HWIP][0]) {
7057 	case IP_VERSION(9, 0, 1):
7058 	case IP_VERSION(9, 2, 1):
7059 	case IP_VERSION(9, 4, 0):
7060 		adev->gds.gds_size = 0x10000;
7061 		break;
7062 	case IP_VERSION(9, 2, 2):
7063 	case IP_VERSION(9, 1, 0):
7064 	case IP_VERSION(9, 4, 1):
7065 		adev->gds.gds_size = 0x1000;
7066 		break;
7067 	case IP_VERSION(9, 4, 2):
7068 		/* aldebaran removed all the GDS internal memory,
7069 		 * only support GWS opcode in kernel, like barrier
7070 		 * semaphore.etc */
7071 		adev->gds.gds_size = 0;
7072 		break;
7073 	default:
7074 		adev->gds.gds_size = 0x10000;
7075 		break;
7076 	}
7077 
7078 	switch (adev->ip_versions[GC_HWIP][0]) {
7079 	case IP_VERSION(9, 0, 1):
7080 	case IP_VERSION(9, 4, 0):
7081 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7082 		break;
7083 	case IP_VERSION(9, 2, 1):
7084 		adev->gds.gds_compute_max_wave_id = 0x27f;
7085 		break;
7086 	case IP_VERSION(9, 2, 2):
7087 	case IP_VERSION(9, 1, 0):
7088 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7089 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7090 		else
7091 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7092 		break;
7093 	case IP_VERSION(9, 4, 1):
7094 		adev->gds.gds_compute_max_wave_id = 0xfff;
7095 		break;
7096 	case IP_VERSION(9, 4, 2):
7097 		/* deprecated for Aldebaran, no usage at all */
7098 		adev->gds.gds_compute_max_wave_id = 0;
7099 		break;
7100 	default:
7101 		/* this really depends on the chip */
7102 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7103 		break;
7104 	}
7105 
7106 	adev->gds.gws_size = 64;
7107 	adev->gds.oa_size = 16;
7108 }
7109 
7110 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7111 						 u32 bitmap)
7112 {
7113 	u32 data;
7114 
7115 	if (!bitmap)
7116 		return;
7117 
7118 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7119 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7120 
7121 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7122 }
7123 
7124 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7125 {
7126 	u32 data, mask;
7127 
7128 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7129 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7130 
7131 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7132 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7133 
7134 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7135 
7136 	return (~data) & mask;
7137 }
7138 
7139 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7140 				 struct amdgpu_cu_info *cu_info)
7141 {
7142 	int i, j, k, counter, active_cu_number = 0;
7143 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7144 	unsigned disable_masks[4 * 4];
7145 
7146 	if (!adev || !cu_info)
7147 		return -EINVAL;
7148 
7149 	/*
7150 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7151 	 */
7152 	if (adev->gfx.config.max_shader_engines *
7153 		adev->gfx.config.max_sh_per_se > 16)
7154 		return -EINVAL;
7155 
7156 	amdgpu_gfx_parse_disable_cu(disable_masks,
7157 				    adev->gfx.config.max_shader_engines,
7158 				    adev->gfx.config.max_sh_per_se);
7159 
7160 	mutex_lock(&adev->grbm_idx_mutex);
7161 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7162 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7163 			mask = 1;
7164 			ao_bitmap = 0;
7165 			counter = 0;
7166 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7167 			gfx_v9_0_set_user_cu_inactive_bitmap(
7168 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7169 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7170 
7171 			/*
7172 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7173 			 * 4x4 size array, and it's usually suitable for Vega
7174 			 * ASICs which has 4*2 SE/SH layout.
7175 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7176 			 * To mostly reduce the impact, we make it compatible
7177 			 * with current bitmap array as below:
7178 			 *    SE4,SH0 --> bitmap[0][1]
7179 			 *    SE5,SH0 --> bitmap[1][1]
7180 			 *    SE6,SH0 --> bitmap[2][1]
7181 			 *    SE7,SH0 --> bitmap[3][1]
7182 			 */
7183 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7184 
7185 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7186 				if (bitmap & mask) {
7187 					if (counter < adev->gfx.config.max_cu_per_sh)
7188 						ao_bitmap |= mask;
7189 					counter ++;
7190 				}
7191 				mask <<= 1;
7192 			}
7193 			active_cu_number += counter;
7194 			if (i < 2 && j < 2)
7195 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7196 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7197 		}
7198 	}
7199 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7200 	mutex_unlock(&adev->grbm_idx_mutex);
7201 
7202 	cu_info->number = active_cu_number;
7203 	cu_info->ao_cu_mask = ao_cu_mask;
7204 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7205 
7206 	return 0;
7207 }
7208 
7209 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7210 {
7211 	.type = AMD_IP_BLOCK_TYPE_GFX,
7212 	.major = 9,
7213 	.minor = 0,
7214 	.rev = 0,
7215 	.funcs = &gfx_v9_0_ip_funcs,
7216 };
7217