xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision ee8ec048)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 enum ta_ras_gfx_subblock {
144 	/*CPC*/
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
146 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
147 	TA_RAS_BLOCK__GFX_CPC_UCODE,
148 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
149 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
150 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
151 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
152 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
153 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
155 	/* CPF*/
156 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
157 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
158 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
159 	TA_RAS_BLOCK__GFX_CPF_TAG,
160 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
161 	/* CPG*/
162 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
164 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
165 	TA_RAS_BLOCK__GFX_CPG_TAG,
166 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
167 	/* GDS*/
168 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
169 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
170 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
171 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
172 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
173 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
174 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
175 	/* SPI*/
176 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
177 	/* SQ*/
178 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
180 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
181 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
182 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
184 	/* SQC (3 ranges)*/
185 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
186 	/* SQC range 0*/
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
189 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
190 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
191 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
192 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
193 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
194 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
197 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
198 	/* SQC range 1*/
199 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
200 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
201 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
202 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
203 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
204 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
205 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
206 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
207 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
209 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
211 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
212 	/* SQC range 2*/
213 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
214 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
215 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
216 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
218 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
220 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
221 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
223 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
225 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
227 	/* TA*/
228 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
231 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
232 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
233 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
234 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
235 	/* TCA*/
236 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
239 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
240 	/* TCC (5 sub-ranges)*/
241 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
242 	/* TCC range 0*/
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
244 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
245 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
246 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
247 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
248 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
250 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
251 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
253 	/* TCC range 1*/
254 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
255 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
256 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
258 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
259 	/* TCC range 2*/
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
261 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
262 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
263 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
264 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
265 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
266 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
267 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
268 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
270 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
271 	/* TCC range 3*/
272 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
273 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
274 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
276 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
277 	/* TCC range 4*/
278 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
279 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
280 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
281 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
282 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
283 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
285 	/* TCI*/
286 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
287 	/* TCP*/
288 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
289 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
291 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
292 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
293 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
294 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
295 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
296 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
297 	/* TD*/
298 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
300 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
301 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
302 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
303 	/* EA (3 sub-ranges)*/
304 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
305 	/* EA range 0*/
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
307 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
308 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
309 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
310 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
311 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
312 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
316 	/* EA range 1*/
317 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
318 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
319 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
320 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
324 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
326 	/* EA range 2*/
327 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
328 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
329 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
330 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
331 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
333 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
334 	/* UTC VM L2 bank*/
335 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
336 	/* UTC VM walker*/
337 	TA_RAS_BLOCK__UTC_VML2_WALKER,
338 	/* UTC ATC L2 2MB cache*/
339 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
340 	/* UTC ATC L2 4KB cache*/
341 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
342 	TA_RAS_BLOCK__GFX_MAX
343 };
344 
345 struct ras_gfx_subblock {
346 	unsigned char *name;
347 	int ta_subblock;
348 	int hw_supported_error_type;
349 	int sw_supported_error_type;
350 };
351 
352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
353 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
354 		#subblock,                                                     \
355 		TA_RAS_BLOCK__##subblock,                                      \
356 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
357 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
358 	}
359 
360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
378 			     0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
380 			     0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
389 			     0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391 			     0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
393 			     0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
395 			     0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
397 			     0, 0),
398 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
399 			     0),
400 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
401 			     1),
402 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
403 			     0, 0, 0),
404 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 			     0),
406 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407 			     0),
408 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
409 			     0),
410 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
411 			     0),
412 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413 			     0),
414 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
415 			     0, 0),
416 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 			     0),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
419 			     0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
421 			     0, 0, 0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 			     0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
425 			     0),
426 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
427 			     0),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
429 			     0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
431 			     0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
433 			     0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
435 			     0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
445 			     1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
447 			     1),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
449 			     1),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
451 			     0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
453 			     0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
466 			     0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
469 			     0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
471 			     0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
473 			     0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
508 };
509 
510 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
511 {
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
532 };
533 
534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
535 {
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
554 };
555 
556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
557 {
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
569 };
570 
571 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
572 {
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
597 };
598 
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
600 {
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
608 };
609 
610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
611 {
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
631 };
632 
633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
634 {
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
654 };
655 
656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
657 {
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
674 };
675 
676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
677 {
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
691 };
692 
693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
694 {
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
706 };
707 
708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
709 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
710 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
711 };
712 
713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
714 {
715 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 };
724 
725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
726 {
727 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 };
736 
737 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
738 {
739 	static void *scratch_reg0;
740 	static void *scratch_reg1;
741 	static void *scratch_reg2;
742 	static void *scratch_reg3;
743 	static void *spare_int;
744 	static uint32_t grbm_cntl;
745 	static uint32_t grbm_idx;
746 
747 	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
748 	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
749 	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
750 	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
751 	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
752 
753 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
754 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
755 
756 	if (amdgpu_sriov_runtime(adev)) {
757 		pr_err("shouldn't call rlcg write register during runtime\n");
758 		return;
759 	}
760 
761 	if (offset == grbm_cntl || offset == grbm_idx) {
762 		if (offset  == grbm_cntl)
763 			writel(v, scratch_reg2);
764 		else if (offset == grbm_idx)
765 			writel(v, scratch_reg3);
766 
767 		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
768 	} else {
769 		uint32_t i = 0;
770 		uint32_t retries = 50000;
771 
772 		writel(v, scratch_reg0);
773 		writel(offset | 0x80000000, scratch_reg1);
774 		writel(1, spare_int);
775 		for (i = 0; i < retries; i++) {
776 			u32 tmp;
777 
778 			tmp = readl(scratch_reg1);
779 			if (!(tmp & 0x80000000))
780 				break;
781 
782 			udelay(10);
783 		}
784 		if (i >= retries)
785 			pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
786 	}
787 
788 }
789 
790 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
791 			       u32 v, u32 acc_flags, u32 hwip)
792 {
793 	if ((acc_flags & AMDGPU_REGS_RLC) &&
794 	    amdgpu_sriov_fullaccess(adev)) {
795 		gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
796 
797 		return;
798 	}
799 
800 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
801 		WREG32_NO_KIQ(offset, v);
802 	else
803 		WREG32(offset, v);
804 }
805 
806 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
807 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
808 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
809 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
810 
811 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
812 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
813 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
814 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
815 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
816 				struct amdgpu_cu_info *cu_info);
817 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
818 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
819 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
820 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
821 					  void *ras_error_status);
822 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
823 				     void *inject_if);
824 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
825 
826 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
827 				uint64_t queue_mask)
828 {
829 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
830 	amdgpu_ring_write(kiq_ring,
831 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
832 		/* vmid_mask:0* queue_type:0 (KIQ) */
833 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
834 	amdgpu_ring_write(kiq_ring,
835 			lower_32_bits(queue_mask));	/* queue mask lo */
836 	amdgpu_ring_write(kiq_ring,
837 			upper_32_bits(queue_mask));	/* queue mask hi */
838 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
839 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
840 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
841 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
842 }
843 
844 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
845 				 struct amdgpu_ring *ring)
846 {
847 	struct amdgpu_device *adev = kiq_ring->adev;
848 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
849 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
850 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
851 
852 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
853 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
854 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
856 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
857 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
858 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
859 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
860 			 /*queue_type: normal compute queue */
861 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
862 			 /* alloc format: all_on_one_pipe */
863 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
864 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
865 			 /* num_queues: must be 1 */
866 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
867 	amdgpu_ring_write(kiq_ring,
868 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
869 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
870 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
871 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
872 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
873 }
874 
875 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
876 				   struct amdgpu_ring *ring,
877 				   enum amdgpu_unmap_queues_action action,
878 				   u64 gpu_addr, u64 seq)
879 {
880 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
881 
882 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
883 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
884 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
885 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
886 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
887 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
888 	amdgpu_ring_write(kiq_ring,
889 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
890 
891 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
892 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
893 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
894 		amdgpu_ring_write(kiq_ring, seq);
895 	} else {
896 		amdgpu_ring_write(kiq_ring, 0);
897 		amdgpu_ring_write(kiq_ring, 0);
898 		amdgpu_ring_write(kiq_ring, 0);
899 	}
900 }
901 
902 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
903 				   struct amdgpu_ring *ring,
904 				   u64 addr,
905 				   u64 seq)
906 {
907 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
910 	amdgpu_ring_write(kiq_ring,
911 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
912 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
913 			  PACKET3_QUERY_STATUS_COMMAND(2));
914 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
915 	amdgpu_ring_write(kiq_ring,
916 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
917 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
920 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
921 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
922 }
923 
924 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
925 				uint16_t pasid, uint32_t flush_type,
926 				bool all_hub)
927 {
928 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
929 	amdgpu_ring_write(kiq_ring,
930 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
931 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
932 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
933 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
934 }
935 
936 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
937 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
938 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
939 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
940 	.kiq_query_status = gfx_v9_0_kiq_query_status,
941 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
942 	.set_resources_size = 8,
943 	.map_queues_size = 7,
944 	.unmap_queues_size = 6,
945 	.query_status_size = 7,
946 	.invalidate_tlbs_size = 2,
947 };
948 
949 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
950 {
951 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
952 }
953 
954 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
955 {
956 	switch (adev->ip_versions[GC_HWIP][0]) {
957 	case IP_VERSION(9, 0, 1):
958 		soc15_program_register_sequence(adev,
959 						golden_settings_gc_9_0,
960 						ARRAY_SIZE(golden_settings_gc_9_0));
961 		soc15_program_register_sequence(adev,
962 						golden_settings_gc_9_0_vg10,
963 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
964 		break;
965 	case IP_VERSION(9, 2, 1):
966 		soc15_program_register_sequence(adev,
967 						golden_settings_gc_9_2_1,
968 						ARRAY_SIZE(golden_settings_gc_9_2_1));
969 		soc15_program_register_sequence(adev,
970 						golden_settings_gc_9_2_1_vg12,
971 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
972 		break;
973 	case IP_VERSION(9, 4, 0):
974 		soc15_program_register_sequence(adev,
975 						golden_settings_gc_9_0,
976 						ARRAY_SIZE(golden_settings_gc_9_0));
977 		soc15_program_register_sequence(adev,
978 						golden_settings_gc_9_0_vg20,
979 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
980 		break;
981 	case IP_VERSION(9, 4, 1):
982 		soc15_program_register_sequence(adev,
983 						golden_settings_gc_9_4_1_arct,
984 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
985 		break;
986 	case IP_VERSION(9, 2, 2):
987 	case IP_VERSION(9, 1, 0):
988 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
989 						ARRAY_SIZE(golden_settings_gc_9_1));
990 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
991 			soc15_program_register_sequence(adev,
992 							golden_settings_gc_9_1_rv2,
993 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
994 		else
995 			soc15_program_register_sequence(adev,
996 							golden_settings_gc_9_1_rv1,
997 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
998 		break;
999 	 case IP_VERSION(9, 3, 0):
1000 		soc15_program_register_sequence(adev,
1001 						golden_settings_gc_9_1_rn,
1002 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1003 		return; /* for renoir, don't need common goldensetting */
1004 	case IP_VERSION(9, 4, 2):
1005 		gfx_v9_4_2_init_golden_registers(adev,
1006 						 adev->smuio.funcs->get_die_id(adev));
1007 		break;
1008 	default:
1009 		break;
1010 	}
1011 
1012 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1013 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1014 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1015 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1016 }
1017 
1018 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1019 {
1020 	adev->gfx.scratch.num_reg = 8;
1021 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1022 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1023 }
1024 
1025 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1026 				       bool wc, uint32_t reg, uint32_t val)
1027 {
1028 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1029 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1030 				WRITE_DATA_DST_SEL(0) |
1031 				(wc ? WR_CONFIRM : 0));
1032 	amdgpu_ring_write(ring, reg);
1033 	amdgpu_ring_write(ring, 0);
1034 	amdgpu_ring_write(ring, val);
1035 }
1036 
1037 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1038 				  int mem_space, int opt, uint32_t addr0,
1039 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1040 				  uint32_t inv)
1041 {
1042 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1043 	amdgpu_ring_write(ring,
1044 				 /* memory (1) or register (0) */
1045 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1046 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1047 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1048 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1049 
1050 	if (mem_space)
1051 		BUG_ON(addr0 & 0x3); /* Dword align */
1052 	amdgpu_ring_write(ring, addr0);
1053 	amdgpu_ring_write(ring, addr1);
1054 	amdgpu_ring_write(ring, ref);
1055 	amdgpu_ring_write(ring, mask);
1056 	amdgpu_ring_write(ring, inv); /* poll interval */
1057 }
1058 
1059 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1060 {
1061 	struct amdgpu_device *adev = ring->adev;
1062 	uint32_t scratch;
1063 	uint32_t tmp = 0;
1064 	unsigned i;
1065 	int r;
1066 
1067 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1068 	if (r)
1069 		return r;
1070 
1071 	WREG32(scratch, 0xCAFEDEAD);
1072 	r = amdgpu_ring_alloc(ring, 3);
1073 	if (r)
1074 		goto error_free_scratch;
1075 
1076 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1077 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1078 	amdgpu_ring_write(ring, 0xDEADBEEF);
1079 	amdgpu_ring_commit(ring);
1080 
1081 	for (i = 0; i < adev->usec_timeout; i++) {
1082 		tmp = RREG32(scratch);
1083 		if (tmp == 0xDEADBEEF)
1084 			break;
1085 		udelay(1);
1086 	}
1087 
1088 	if (i >= adev->usec_timeout)
1089 		r = -ETIMEDOUT;
1090 
1091 error_free_scratch:
1092 	amdgpu_gfx_scratch_free(adev, scratch);
1093 	return r;
1094 }
1095 
1096 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1097 {
1098 	struct amdgpu_device *adev = ring->adev;
1099 	struct amdgpu_ib ib;
1100 	struct dma_fence *f = NULL;
1101 
1102 	unsigned index;
1103 	uint64_t gpu_addr;
1104 	uint32_t tmp;
1105 	long r;
1106 
1107 	r = amdgpu_device_wb_get(adev, &index);
1108 	if (r)
1109 		return r;
1110 
1111 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1112 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1113 	memset(&ib, 0, sizeof(ib));
1114 	r = amdgpu_ib_get(adev, NULL, 16,
1115 					AMDGPU_IB_POOL_DIRECT, &ib);
1116 	if (r)
1117 		goto err1;
1118 
1119 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1120 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1121 	ib.ptr[2] = lower_32_bits(gpu_addr);
1122 	ib.ptr[3] = upper_32_bits(gpu_addr);
1123 	ib.ptr[4] = 0xDEADBEEF;
1124 	ib.length_dw = 5;
1125 
1126 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1127 	if (r)
1128 		goto err2;
1129 
1130 	r = dma_fence_wait_timeout(f, false, timeout);
1131 	if (r == 0) {
1132 		r = -ETIMEDOUT;
1133 		goto err2;
1134 	} else if (r < 0) {
1135 		goto err2;
1136 	}
1137 
1138 	tmp = adev->wb.wb[index];
1139 	if (tmp == 0xDEADBEEF)
1140 		r = 0;
1141 	else
1142 		r = -EINVAL;
1143 
1144 err2:
1145 	amdgpu_ib_free(adev, &ib, NULL);
1146 	dma_fence_put(f);
1147 err1:
1148 	amdgpu_device_wb_free(adev, index);
1149 	return r;
1150 }
1151 
1152 
1153 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1154 {
1155 	release_firmware(adev->gfx.pfp_fw);
1156 	adev->gfx.pfp_fw = NULL;
1157 	release_firmware(adev->gfx.me_fw);
1158 	adev->gfx.me_fw = NULL;
1159 	release_firmware(adev->gfx.ce_fw);
1160 	adev->gfx.ce_fw = NULL;
1161 	release_firmware(adev->gfx.rlc_fw);
1162 	adev->gfx.rlc_fw = NULL;
1163 	release_firmware(adev->gfx.mec_fw);
1164 	adev->gfx.mec_fw = NULL;
1165 	release_firmware(adev->gfx.mec2_fw);
1166 	adev->gfx.mec2_fw = NULL;
1167 
1168 	kfree(adev->gfx.rlc.register_list_format);
1169 }
1170 
1171 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1172 {
1173 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1174 
1175 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1176 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1177 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1178 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1179 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1180 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1181 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1182 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1183 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1184 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1185 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1186 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1187 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1188 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1189 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1190 }
1191 
1192 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1193 {
1194 	adev->gfx.me_fw_write_wait = false;
1195 	adev->gfx.mec_fw_write_wait = false;
1196 
1197 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1198 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1199 	    (adev->gfx.mec_feature_version < 46) ||
1200 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1201 	    (adev->gfx.pfp_feature_version < 46)))
1202 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1203 
1204 	switch (adev->ip_versions[GC_HWIP][0]) {
1205 	case IP_VERSION(9, 0, 1):
1206 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1207 		    (adev->gfx.me_feature_version >= 42) &&
1208 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1209 		    (adev->gfx.pfp_feature_version >= 42))
1210 			adev->gfx.me_fw_write_wait = true;
1211 
1212 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1213 		    (adev->gfx.mec_feature_version >= 42))
1214 			adev->gfx.mec_fw_write_wait = true;
1215 		break;
1216 	case IP_VERSION(9, 2, 1):
1217 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1218 		    (adev->gfx.me_feature_version >= 44) &&
1219 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1220 		    (adev->gfx.pfp_feature_version >= 44))
1221 			adev->gfx.me_fw_write_wait = true;
1222 
1223 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1224 		    (adev->gfx.mec_feature_version >= 44))
1225 			adev->gfx.mec_fw_write_wait = true;
1226 		break;
1227 	case IP_VERSION(9, 4, 0):
1228 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1229 		    (adev->gfx.me_feature_version >= 44) &&
1230 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1231 		    (adev->gfx.pfp_feature_version >= 44))
1232 			adev->gfx.me_fw_write_wait = true;
1233 
1234 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1235 		    (adev->gfx.mec_feature_version >= 44))
1236 			adev->gfx.mec_fw_write_wait = true;
1237 		break;
1238 	case IP_VERSION(9, 1, 0):
1239 	case IP_VERSION(9, 2, 2):
1240 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1241 		    (adev->gfx.me_feature_version >= 42) &&
1242 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1243 		    (adev->gfx.pfp_feature_version >= 42))
1244 			adev->gfx.me_fw_write_wait = true;
1245 
1246 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1247 		    (adev->gfx.mec_feature_version >= 42))
1248 			adev->gfx.mec_fw_write_wait = true;
1249 		break;
1250 	default:
1251 		adev->gfx.me_fw_write_wait = true;
1252 		adev->gfx.mec_fw_write_wait = true;
1253 		break;
1254 	}
1255 }
1256 
1257 struct amdgpu_gfxoff_quirk {
1258 	u16 chip_vendor;
1259 	u16 chip_device;
1260 	u16 subsys_vendor;
1261 	u16 subsys_device;
1262 	u8 revision;
1263 };
1264 
1265 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1266 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1267 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1268 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1269 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1270 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1271 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1272 	{ 0, 0, 0, 0, 0 },
1273 };
1274 
1275 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1276 {
1277 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1278 
1279 	while (p && p->chip_device != 0) {
1280 		if (pdev->vendor == p->chip_vendor &&
1281 		    pdev->device == p->chip_device &&
1282 		    pdev->subsystem_vendor == p->subsys_vendor &&
1283 		    pdev->subsystem_device == p->subsys_device &&
1284 		    pdev->revision == p->revision) {
1285 			return true;
1286 		}
1287 		++p;
1288 	}
1289 	return false;
1290 }
1291 
1292 static bool is_raven_kicker(struct amdgpu_device *adev)
1293 {
1294 	if (adev->pm.fw_version >= 0x41e2b)
1295 		return true;
1296 	else
1297 		return false;
1298 }
1299 
1300 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1301 {
1302 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1303 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1304 	    (adev->gfx.me_feature_version >= 52))
1305 		return true;
1306 	else
1307 		return false;
1308 }
1309 
1310 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1311 {
1312 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1313 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1314 
1315 	switch (adev->ip_versions[GC_HWIP][0]) {
1316 	case IP_VERSION(9, 0, 1):
1317 	case IP_VERSION(9, 2, 1):
1318 	case IP_VERSION(9, 4, 0):
1319 		break;
1320 	case IP_VERSION(9, 2, 2):
1321 	case IP_VERSION(9, 1, 0):
1322 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1323 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1324 		    ((!is_raven_kicker(adev) &&
1325 		      adev->gfx.rlc_fw_version < 531) ||
1326 		     (adev->gfx.rlc_feature_version < 1) ||
1327 		     !adev->gfx.rlc.is_rlc_v2_1))
1328 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1329 
1330 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1331 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1332 				AMD_PG_SUPPORT_CP |
1333 				AMD_PG_SUPPORT_RLC_SMU_HS;
1334 		break;
1335 	case IP_VERSION(9, 3, 0):
1336 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1337 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1338 				AMD_PG_SUPPORT_CP |
1339 				AMD_PG_SUPPORT_RLC_SMU_HS;
1340 		break;
1341 	default:
1342 		break;
1343 	}
1344 }
1345 
1346 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1347 					  const char *chip_name)
1348 {
1349 	char fw_name[30];
1350 	int err;
1351 	struct amdgpu_firmware_info *info = NULL;
1352 	const struct common_firmware_header *header = NULL;
1353 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1354 
1355 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1356 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1357 	if (err)
1358 		goto out;
1359 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1360 	if (err)
1361 		goto out;
1362 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1363 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1364 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1365 
1366 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1367 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1368 	if (err)
1369 		goto out;
1370 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1371 	if (err)
1372 		goto out;
1373 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1374 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1375 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1376 
1377 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1378 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1379 	if (err)
1380 		goto out;
1381 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1382 	if (err)
1383 		goto out;
1384 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1385 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1386 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1387 
1388 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1389 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1390 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1391 		info->fw = adev->gfx.pfp_fw;
1392 		header = (const struct common_firmware_header *)info->fw->data;
1393 		adev->firmware.fw_size +=
1394 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1395 
1396 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1397 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1398 		info->fw = adev->gfx.me_fw;
1399 		header = (const struct common_firmware_header *)info->fw->data;
1400 		adev->firmware.fw_size +=
1401 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1402 
1403 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1404 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1405 		info->fw = adev->gfx.ce_fw;
1406 		header = (const struct common_firmware_header *)info->fw->data;
1407 		adev->firmware.fw_size +=
1408 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1409 	}
1410 
1411 out:
1412 	if (err) {
1413 		dev_err(adev->dev,
1414 			"gfx9: Failed to load firmware \"%s\"\n",
1415 			fw_name);
1416 		release_firmware(adev->gfx.pfp_fw);
1417 		adev->gfx.pfp_fw = NULL;
1418 		release_firmware(adev->gfx.me_fw);
1419 		adev->gfx.me_fw = NULL;
1420 		release_firmware(adev->gfx.ce_fw);
1421 		adev->gfx.ce_fw = NULL;
1422 	}
1423 	return err;
1424 }
1425 
1426 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1427 					  const char *chip_name)
1428 {
1429 	char fw_name[30];
1430 	int err;
1431 	struct amdgpu_firmware_info *info = NULL;
1432 	const struct common_firmware_header *header = NULL;
1433 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1434 	unsigned int *tmp = NULL;
1435 	unsigned int i = 0;
1436 	uint16_t version_major;
1437 	uint16_t version_minor;
1438 	uint32_t smu_version;
1439 
1440 	/*
1441 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1442 	 * instead of picasso_rlc.bin.
1443 	 * Judgment method:
1444 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1445 	 *          or revision >= 0xD8 && revision <= 0xDF
1446 	 * otherwise is PCO FP5
1447 	 */
1448 	if (!strcmp(chip_name, "picasso") &&
1449 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1450 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1451 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1452 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1453 		(smu_version >= 0x41e2b))
1454 		/**
1455 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1456 		*/
1457 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1458 	else
1459 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1460 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1461 	if (err)
1462 		goto out;
1463 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1464 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1465 
1466 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1467 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1468 	if (version_major == 2 && version_minor == 1)
1469 		adev->gfx.rlc.is_rlc_v2_1 = true;
1470 
1471 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1472 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1473 	adev->gfx.rlc.save_and_restore_offset =
1474 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1475 	adev->gfx.rlc.clear_state_descriptor_offset =
1476 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1477 	adev->gfx.rlc.avail_scratch_ram_locations =
1478 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1479 	adev->gfx.rlc.reg_restore_list_size =
1480 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1481 	adev->gfx.rlc.reg_list_format_start =
1482 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1483 	adev->gfx.rlc.reg_list_format_separate_start =
1484 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1485 	adev->gfx.rlc.starting_offsets_start =
1486 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1487 	adev->gfx.rlc.reg_list_format_size_bytes =
1488 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1489 	adev->gfx.rlc.reg_list_size_bytes =
1490 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1491 	adev->gfx.rlc.register_list_format =
1492 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1493 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1494 	if (!adev->gfx.rlc.register_list_format) {
1495 		err = -ENOMEM;
1496 		goto out;
1497 	}
1498 
1499 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1500 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1501 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1502 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1503 
1504 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1505 
1506 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1507 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1508 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1509 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1510 
1511 	if (adev->gfx.rlc.is_rlc_v2_1)
1512 		gfx_v9_0_init_rlc_ext_microcode(adev);
1513 
1514 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1515 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1516 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1517 		info->fw = adev->gfx.rlc_fw;
1518 		header = (const struct common_firmware_header *)info->fw->data;
1519 		adev->firmware.fw_size +=
1520 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1521 
1522 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1523 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1524 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1525 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1526 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1527 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1528 			info->fw = adev->gfx.rlc_fw;
1529 			adev->firmware.fw_size +=
1530 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1531 
1532 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1533 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1534 			info->fw = adev->gfx.rlc_fw;
1535 			adev->firmware.fw_size +=
1536 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1537 
1538 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1539 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1540 			info->fw = adev->gfx.rlc_fw;
1541 			adev->firmware.fw_size +=
1542 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1543 		}
1544 	}
1545 
1546 out:
1547 	if (err) {
1548 		dev_err(adev->dev,
1549 			"gfx9: Failed to load firmware \"%s\"\n",
1550 			fw_name);
1551 		release_firmware(adev->gfx.rlc_fw);
1552 		adev->gfx.rlc_fw = NULL;
1553 	}
1554 	return err;
1555 }
1556 
1557 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1558 {
1559 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1560 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1561 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1562 		return false;
1563 
1564 	return true;
1565 }
1566 
1567 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1568 					  const char *chip_name)
1569 {
1570 	char fw_name[30];
1571 	int err;
1572 	struct amdgpu_firmware_info *info = NULL;
1573 	const struct common_firmware_header *header = NULL;
1574 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1575 
1576 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1577 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1578 	if (err)
1579 		goto out;
1580 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1581 	if (err)
1582 		goto out;
1583 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1584 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1585 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1586 
1587 
1588 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1589 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1590 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1591 		if (!err) {
1592 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1593 			if (err)
1594 				goto out;
1595 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1596 			adev->gfx.mec2_fw->data;
1597 			adev->gfx.mec2_fw_version =
1598 			le32_to_cpu(cp_hdr->header.ucode_version);
1599 			adev->gfx.mec2_feature_version =
1600 			le32_to_cpu(cp_hdr->ucode_feature_version);
1601 		} else {
1602 			err = 0;
1603 			adev->gfx.mec2_fw = NULL;
1604 		}
1605 	} else {
1606 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1607 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1608 	}
1609 
1610 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1611 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1612 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1613 		info->fw = adev->gfx.mec_fw;
1614 		header = (const struct common_firmware_header *)info->fw->data;
1615 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1616 		adev->firmware.fw_size +=
1617 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1618 
1619 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1620 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1621 		info->fw = adev->gfx.mec_fw;
1622 		adev->firmware.fw_size +=
1623 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1624 
1625 		if (adev->gfx.mec2_fw) {
1626 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1627 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1628 			info->fw = adev->gfx.mec2_fw;
1629 			header = (const struct common_firmware_header *)info->fw->data;
1630 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1631 			adev->firmware.fw_size +=
1632 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1633 
1634 			/* TODO: Determine if MEC2 JT FW loading can be removed
1635 				 for all GFX V9 asic and above */
1636 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1637 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1638 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1639 				info->fw = adev->gfx.mec2_fw;
1640 				adev->firmware.fw_size +=
1641 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1642 					PAGE_SIZE);
1643 			}
1644 		}
1645 	}
1646 
1647 out:
1648 	gfx_v9_0_check_if_need_gfxoff(adev);
1649 	gfx_v9_0_check_fw_write_wait(adev);
1650 	if (err) {
1651 		dev_err(adev->dev,
1652 			"gfx9: Failed to load firmware \"%s\"\n",
1653 			fw_name);
1654 		release_firmware(adev->gfx.mec_fw);
1655 		adev->gfx.mec_fw = NULL;
1656 		release_firmware(adev->gfx.mec2_fw);
1657 		adev->gfx.mec2_fw = NULL;
1658 	}
1659 	return err;
1660 }
1661 
1662 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1663 {
1664 	const char *chip_name;
1665 	int r;
1666 
1667 	DRM_DEBUG("\n");
1668 
1669 	switch (adev->ip_versions[GC_HWIP][0]) {
1670 	case IP_VERSION(9, 0, 1):
1671 		chip_name = "vega10";
1672 		break;
1673 	case IP_VERSION(9, 2, 1):
1674 		chip_name = "vega12";
1675 		break;
1676 	case IP_VERSION(9, 4, 0):
1677 		chip_name = "vega20";
1678 		break;
1679 	case IP_VERSION(9, 2, 2):
1680 	case IP_VERSION(9, 1, 0):
1681 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1682 			chip_name = "raven2";
1683 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1684 			chip_name = "picasso";
1685 		else
1686 			chip_name = "raven";
1687 		break;
1688 	case IP_VERSION(9, 4, 1):
1689 		chip_name = "arcturus";
1690 		break;
1691 	case IP_VERSION(9, 3, 0):
1692 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1693 			chip_name = "renoir";
1694 		else
1695 			chip_name = "green_sardine";
1696 		break;
1697 	case IP_VERSION(9, 4, 2):
1698 		chip_name = "aldebaran";
1699 		break;
1700 	default:
1701 		BUG();
1702 	}
1703 
1704 	/* No CPG in Arcturus */
1705 	if (adev->gfx.num_gfx_rings) {
1706 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1707 		if (r)
1708 			return r;
1709 	}
1710 
1711 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1712 	if (r)
1713 		return r;
1714 
1715 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1716 	if (r)
1717 		return r;
1718 
1719 	return r;
1720 }
1721 
1722 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1723 {
1724 	u32 count = 0;
1725 	const struct cs_section_def *sect = NULL;
1726 	const struct cs_extent_def *ext = NULL;
1727 
1728 	/* begin clear state */
1729 	count += 2;
1730 	/* context control state */
1731 	count += 3;
1732 
1733 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1734 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1735 			if (sect->id == SECT_CONTEXT)
1736 				count += 2 + ext->reg_count;
1737 			else
1738 				return 0;
1739 		}
1740 	}
1741 
1742 	/* end clear state */
1743 	count += 2;
1744 	/* clear state */
1745 	count += 2;
1746 
1747 	return count;
1748 }
1749 
1750 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1751 				    volatile u32 *buffer)
1752 {
1753 	u32 count = 0, i;
1754 	const struct cs_section_def *sect = NULL;
1755 	const struct cs_extent_def *ext = NULL;
1756 
1757 	if (adev->gfx.rlc.cs_data == NULL)
1758 		return;
1759 	if (buffer == NULL)
1760 		return;
1761 
1762 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1763 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1764 
1765 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1766 	buffer[count++] = cpu_to_le32(0x80000000);
1767 	buffer[count++] = cpu_to_le32(0x80000000);
1768 
1769 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1770 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1771 			if (sect->id == SECT_CONTEXT) {
1772 				buffer[count++] =
1773 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1774 				buffer[count++] = cpu_to_le32(ext->reg_index -
1775 						PACKET3_SET_CONTEXT_REG_START);
1776 				for (i = 0; i < ext->reg_count; i++)
1777 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1778 			} else {
1779 				return;
1780 			}
1781 		}
1782 	}
1783 
1784 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1785 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1786 
1787 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1788 	buffer[count++] = cpu_to_le32(0);
1789 }
1790 
1791 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1792 {
1793 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1794 	uint32_t pg_always_on_cu_num = 2;
1795 	uint32_t always_on_cu_num;
1796 	uint32_t i, j, k;
1797 	uint32_t mask, cu_bitmap, counter;
1798 
1799 	if (adev->flags & AMD_IS_APU)
1800 		always_on_cu_num = 4;
1801 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1802 		always_on_cu_num = 8;
1803 	else
1804 		always_on_cu_num = 12;
1805 
1806 	mutex_lock(&adev->grbm_idx_mutex);
1807 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1808 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1809 			mask = 1;
1810 			cu_bitmap = 0;
1811 			counter = 0;
1812 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1813 
1814 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1815 				if (cu_info->bitmap[i][j] & mask) {
1816 					if (counter == pg_always_on_cu_num)
1817 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1818 					if (counter < always_on_cu_num)
1819 						cu_bitmap |= mask;
1820 					else
1821 						break;
1822 					counter++;
1823 				}
1824 				mask <<= 1;
1825 			}
1826 
1827 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1828 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1829 		}
1830 	}
1831 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1832 	mutex_unlock(&adev->grbm_idx_mutex);
1833 }
1834 
1835 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1836 {
1837 	uint32_t data;
1838 
1839 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1840 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1841 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1842 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1843 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1844 
1845 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1846 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1847 
1848 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1849 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1850 
1851 	mutex_lock(&adev->grbm_idx_mutex);
1852 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1853 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1854 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1855 
1856 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1857 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1858 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1859 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1860 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1861 
1862 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1863 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1864 	data &= 0x0000FFFF;
1865 	data |= 0x00C00000;
1866 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1867 
1868 	/*
1869 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1870 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1871 	 */
1872 
1873 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1874 	 * but used for RLC_LB_CNTL configuration */
1875 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1876 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1877 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1878 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1879 	mutex_unlock(&adev->grbm_idx_mutex);
1880 
1881 	gfx_v9_0_init_always_on_cu_mask(adev);
1882 }
1883 
1884 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1885 {
1886 	uint32_t data;
1887 
1888 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1889 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1890 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1891 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1892 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1893 
1894 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1895 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1896 
1897 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1898 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1899 
1900 	mutex_lock(&adev->grbm_idx_mutex);
1901 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1902 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1903 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1904 
1905 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1906 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1907 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1908 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1909 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1910 
1911 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1912 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1913 	data &= 0x0000FFFF;
1914 	data |= 0x00C00000;
1915 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1916 
1917 	/*
1918 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1919 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1920 	 */
1921 
1922 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1923 	 * but used for RLC_LB_CNTL configuration */
1924 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1925 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1926 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1927 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1928 	mutex_unlock(&adev->grbm_idx_mutex);
1929 
1930 	gfx_v9_0_init_always_on_cu_mask(adev);
1931 }
1932 
1933 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1934 {
1935 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1936 }
1937 
1938 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1939 {
1940 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1941 		return 5;
1942 	else
1943 		return 4;
1944 }
1945 
1946 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1947 {
1948 	const struct cs_section_def *cs_data;
1949 	int r;
1950 
1951 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1952 
1953 	cs_data = adev->gfx.rlc.cs_data;
1954 
1955 	if (cs_data) {
1956 		/* init clear state block */
1957 		r = amdgpu_gfx_rlc_init_csb(adev);
1958 		if (r)
1959 			return r;
1960 	}
1961 
1962 	if (adev->flags & AMD_IS_APU) {
1963 		/* TODO: double check the cp_table_size for RV */
1964 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1965 		r = amdgpu_gfx_rlc_init_cpt(adev);
1966 		if (r)
1967 			return r;
1968 	}
1969 
1970 	switch (adev->ip_versions[GC_HWIP][0]) {
1971 	case IP_VERSION(9, 2, 2):
1972 	case IP_VERSION(9, 1, 0):
1973 		gfx_v9_0_init_lbpw(adev);
1974 		break;
1975 	case IP_VERSION(9, 4, 0):
1976 		gfx_v9_4_init_lbpw(adev);
1977 		break;
1978 	default:
1979 		break;
1980 	}
1981 
1982 	/* init spm vmid with 0xf */
1983 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1984 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1985 
1986 	return 0;
1987 }
1988 
1989 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1990 {
1991 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1992 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1993 }
1994 
1995 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1996 {
1997 	int r;
1998 	u32 *hpd;
1999 	const __le32 *fw_data;
2000 	unsigned fw_size;
2001 	u32 *fw;
2002 	size_t mec_hpd_size;
2003 
2004 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2005 
2006 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2007 
2008 	/* take ownership of the relevant compute queues */
2009 	amdgpu_gfx_compute_queue_acquire(adev);
2010 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2011 	if (mec_hpd_size) {
2012 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2013 					      AMDGPU_GEM_DOMAIN_VRAM,
2014 					      &adev->gfx.mec.hpd_eop_obj,
2015 					      &adev->gfx.mec.hpd_eop_gpu_addr,
2016 					      (void **)&hpd);
2017 		if (r) {
2018 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2019 			gfx_v9_0_mec_fini(adev);
2020 			return r;
2021 		}
2022 
2023 		memset(hpd, 0, mec_hpd_size);
2024 
2025 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2026 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2027 	}
2028 
2029 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2030 
2031 	fw_data = (const __le32 *)
2032 		(adev->gfx.mec_fw->data +
2033 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2034 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2035 
2036 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2037 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2038 				      &adev->gfx.mec.mec_fw_obj,
2039 				      &adev->gfx.mec.mec_fw_gpu_addr,
2040 				      (void **)&fw);
2041 	if (r) {
2042 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2043 		gfx_v9_0_mec_fini(adev);
2044 		return r;
2045 	}
2046 
2047 	memcpy(fw, fw_data, fw_size);
2048 
2049 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2050 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2051 
2052 	return 0;
2053 }
2054 
2055 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2056 {
2057 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2058 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2059 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2060 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2061 		(SQ_IND_INDEX__FORCE_READ_MASK));
2062 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2063 }
2064 
2065 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2066 			   uint32_t wave, uint32_t thread,
2067 			   uint32_t regno, uint32_t num, uint32_t *out)
2068 {
2069 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2070 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2071 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2072 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2073 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2074 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2075 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2076 	while (num--)
2077 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2078 }
2079 
2080 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2081 {
2082 	/* type 1 wave data */
2083 	dst[(*no_fields)++] = 1;
2084 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2085 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2086 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2087 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2088 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2089 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2090 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2091 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2092 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2093 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2094 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2095 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2096 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2097 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2098 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2099 }
2100 
2101 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2102 				     uint32_t wave, uint32_t start,
2103 				     uint32_t size, uint32_t *dst)
2104 {
2105 	wave_read_regs(
2106 		adev, simd, wave, 0,
2107 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2108 }
2109 
2110 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2111 				     uint32_t wave, uint32_t thread,
2112 				     uint32_t start, uint32_t size,
2113 				     uint32_t *dst)
2114 {
2115 	wave_read_regs(
2116 		adev, simd, wave, thread,
2117 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2118 }
2119 
2120 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2121 				  u32 me, u32 pipe, u32 q, u32 vm)
2122 {
2123 	soc15_grbm_select(adev, me, pipe, q, vm);
2124 }
2125 
2126 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2127         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2128         .select_se_sh = &gfx_v9_0_select_se_sh,
2129         .read_wave_data = &gfx_v9_0_read_wave_data,
2130         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2131         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2132         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2133 };
2134 
2135 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2136 	.ras_late_init = amdgpu_gfx_ras_late_init,
2137 	.ras_fini = amdgpu_gfx_ras_fini,
2138 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2139 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2140 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2141 };
2142 
2143 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2144 {
2145 	u32 gb_addr_config;
2146 	int err;
2147 
2148 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2149 
2150 	switch (adev->ip_versions[GC_HWIP][0]) {
2151 	case IP_VERSION(9, 0, 1):
2152 		adev->gfx.config.max_hw_contexts = 8;
2153 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2154 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2155 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2156 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2157 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2158 		break;
2159 	case IP_VERSION(9, 2, 1):
2160 		adev->gfx.config.max_hw_contexts = 8;
2161 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2162 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2163 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2164 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2165 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2166 		DRM_INFO("fix gfx.config for vega12\n");
2167 		break;
2168 	case IP_VERSION(9, 4, 0):
2169 		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2170 		adev->gfx.config.max_hw_contexts = 8;
2171 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2172 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2173 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2174 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2175 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2176 		gb_addr_config &= ~0xf3e777ff;
2177 		gb_addr_config |= 0x22014042;
2178 		/* check vbios table if gpu info is not available */
2179 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2180 		if (err)
2181 			return err;
2182 		break;
2183 	case IP_VERSION(9, 2, 2):
2184 	case IP_VERSION(9, 1, 0):
2185 		adev->gfx.config.max_hw_contexts = 8;
2186 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2187 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2188 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2189 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2190 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2191 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2192 		else
2193 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2194 		break;
2195 	case IP_VERSION(9, 4, 1):
2196 		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2197 		adev->gfx.config.max_hw_contexts = 8;
2198 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2199 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2200 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2201 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2202 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2203 		gb_addr_config &= ~0xf3e777ff;
2204 		gb_addr_config |= 0x22014042;
2205 		break;
2206 	case IP_VERSION(9, 3, 0):
2207 		adev->gfx.config.max_hw_contexts = 8;
2208 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2209 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2210 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2211 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2212 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2213 		gb_addr_config &= ~0xf3e777ff;
2214 		gb_addr_config |= 0x22010042;
2215 		break;
2216 	case IP_VERSION(9, 4, 2):
2217 		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2218 		adev->gfx.config.max_hw_contexts = 8;
2219 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2220 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2221 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2222 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2223 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2224 		gb_addr_config &= ~0xf3e777ff;
2225 		gb_addr_config |= 0x22014042;
2226 		/* check vbios table if gpu info is not available */
2227 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2228 		if (err)
2229 			return err;
2230 		break;
2231 	default:
2232 		BUG();
2233 		break;
2234 	}
2235 
2236 	adev->gfx.config.gb_addr_config = gb_addr_config;
2237 
2238 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2239 			REG_GET_FIELD(
2240 					adev->gfx.config.gb_addr_config,
2241 					GB_ADDR_CONFIG,
2242 					NUM_PIPES);
2243 
2244 	adev->gfx.config.max_tile_pipes =
2245 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2246 
2247 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2248 			REG_GET_FIELD(
2249 					adev->gfx.config.gb_addr_config,
2250 					GB_ADDR_CONFIG,
2251 					NUM_BANKS);
2252 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2253 			REG_GET_FIELD(
2254 					adev->gfx.config.gb_addr_config,
2255 					GB_ADDR_CONFIG,
2256 					MAX_COMPRESSED_FRAGS);
2257 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2258 			REG_GET_FIELD(
2259 					adev->gfx.config.gb_addr_config,
2260 					GB_ADDR_CONFIG,
2261 					NUM_RB_PER_SE);
2262 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2263 			REG_GET_FIELD(
2264 					adev->gfx.config.gb_addr_config,
2265 					GB_ADDR_CONFIG,
2266 					NUM_SHADER_ENGINES);
2267 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2268 			REG_GET_FIELD(
2269 					adev->gfx.config.gb_addr_config,
2270 					GB_ADDR_CONFIG,
2271 					PIPE_INTERLEAVE_SIZE));
2272 
2273 	return 0;
2274 }
2275 
2276 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2277 				      int mec, int pipe, int queue)
2278 {
2279 	unsigned irq_type;
2280 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2281 	unsigned int hw_prio;
2282 
2283 	ring = &adev->gfx.compute_ring[ring_id];
2284 
2285 	/* mec0 is me1 */
2286 	ring->me = mec + 1;
2287 	ring->pipe = pipe;
2288 	ring->queue = queue;
2289 
2290 	ring->ring_obj = NULL;
2291 	ring->use_doorbell = true;
2292 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2293 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2294 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2295 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2296 
2297 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2298 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2299 		+ ring->pipe;
2300 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2301 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2302 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2303 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2304 				hw_prio, NULL);
2305 }
2306 
2307 static int gfx_v9_0_sw_init(void *handle)
2308 {
2309 	int i, j, k, r, ring_id;
2310 	struct amdgpu_ring *ring;
2311 	struct amdgpu_kiq *kiq;
2312 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2313 
2314 	switch (adev->ip_versions[GC_HWIP][0]) {
2315 	case IP_VERSION(9, 0, 1):
2316 	case IP_VERSION(9, 2, 1):
2317 	case IP_VERSION(9, 4, 0):
2318 	case IP_VERSION(9, 2, 2):
2319 	case IP_VERSION(9, 1, 0):
2320 	case IP_VERSION(9, 4, 1):
2321 	case IP_VERSION(9, 3, 0):
2322 	case IP_VERSION(9, 4, 2):
2323 		adev->gfx.mec.num_mec = 2;
2324 		break;
2325 	default:
2326 		adev->gfx.mec.num_mec = 1;
2327 		break;
2328 	}
2329 
2330 	adev->gfx.mec.num_pipe_per_mec = 4;
2331 	adev->gfx.mec.num_queue_per_pipe = 8;
2332 
2333 	/* EOP Event */
2334 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2335 	if (r)
2336 		return r;
2337 
2338 	/* Privileged reg */
2339 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2340 			      &adev->gfx.priv_reg_irq);
2341 	if (r)
2342 		return r;
2343 
2344 	/* Privileged inst */
2345 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2346 			      &adev->gfx.priv_inst_irq);
2347 	if (r)
2348 		return r;
2349 
2350 	/* ECC error */
2351 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2352 			      &adev->gfx.cp_ecc_error_irq);
2353 	if (r)
2354 		return r;
2355 
2356 	/* FUE error */
2357 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2358 			      &adev->gfx.cp_ecc_error_irq);
2359 	if (r)
2360 		return r;
2361 
2362 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2363 
2364 	gfx_v9_0_scratch_init(adev);
2365 
2366 	r = gfx_v9_0_init_microcode(adev);
2367 	if (r) {
2368 		DRM_ERROR("Failed to load gfx firmware!\n");
2369 		return r;
2370 	}
2371 
2372 	r = adev->gfx.rlc.funcs->init(adev);
2373 	if (r) {
2374 		DRM_ERROR("Failed to init rlc BOs!\n");
2375 		return r;
2376 	}
2377 
2378 	r = gfx_v9_0_mec_init(adev);
2379 	if (r) {
2380 		DRM_ERROR("Failed to init MEC BOs!\n");
2381 		return r;
2382 	}
2383 
2384 	/* set up the gfx ring */
2385 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2386 		ring = &adev->gfx.gfx_ring[i];
2387 		ring->ring_obj = NULL;
2388 		if (!i)
2389 			sprintf(ring->name, "gfx");
2390 		else
2391 			sprintf(ring->name, "gfx_%d", i);
2392 		ring->use_doorbell = true;
2393 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2394 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2395 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2396 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2397 		if (r)
2398 			return r;
2399 	}
2400 
2401 	/* set up the compute queues - allocate horizontally across pipes */
2402 	ring_id = 0;
2403 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2404 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2405 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2406 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2407 					continue;
2408 
2409 				r = gfx_v9_0_compute_ring_init(adev,
2410 							       ring_id,
2411 							       i, k, j);
2412 				if (r)
2413 					return r;
2414 
2415 				ring_id++;
2416 			}
2417 		}
2418 	}
2419 
2420 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2421 	if (r) {
2422 		DRM_ERROR("Failed to init KIQ BOs!\n");
2423 		return r;
2424 	}
2425 
2426 	kiq = &adev->gfx.kiq;
2427 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2428 	if (r)
2429 		return r;
2430 
2431 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2432 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2433 	if (r)
2434 		return r;
2435 
2436 	adev->gfx.ce_ram_size = 0x8000;
2437 
2438 	r = gfx_v9_0_gpu_early_init(adev);
2439 	if (r)
2440 		return r;
2441 
2442 	return 0;
2443 }
2444 
2445 
2446 static int gfx_v9_0_sw_fini(void *handle)
2447 {
2448 	int i;
2449 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450 
2451 	if (adev->gfx.ras_funcs &&
2452 	    adev->gfx.ras_funcs->ras_fini)
2453 		adev->gfx.ras_funcs->ras_fini(adev);
2454 
2455 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2456 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2457 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2458 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2459 
2460 	amdgpu_gfx_mqd_sw_fini(adev);
2461 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2462 	amdgpu_gfx_kiq_fini(adev);
2463 
2464 	gfx_v9_0_mec_fini(adev);
2465 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2466 				&adev->gfx.rlc.clear_state_gpu_addr,
2467 				(void **)&adev->gfx.rlc.cs_ptr);
2468 	if (adev->flags & AMD_IS_APU) {
2469 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2470 				&adev->gfx.rlc.cp_table_gpu_addr,
2471 				(void **)&adev->gfx.rlc.cp_table_ptr);
2472 	}
2473 	gfx_v9_0_free_microcode(adev);
2474 
2475 	return 0;
2476 }
2477 
2478 
2479 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2480 {
2481 	/* TODO */
2482 }
2483 
2484 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2485 			   u32 instance)
2486 {
2487 	u32 data;
2488 
2489 	if (instance == 0xffffffff)
2490 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2491 	else
2492 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2493 
2494 	if (se_num == 0xffffffff)
2495 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2496 	else
2497 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2498 
2499 	if (sh_num == 0xffffffff)
2500 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2501 	else
2502 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2503 
2504 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2505 }
2506 
2507 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2508 {
2509 	u32 data, mask;
2510 
2511 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2512 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2513 
2514 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2515 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2516 
2517 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2518 					 adev->gfx.config.max_sh_per_se);
2519 
2520 	return (~data) & mask;
2521 }
2522 
2523 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2524 {
2525 	int i, j;
2526 	u32 data;
2527 	u32 active_rbs = 0;
2528 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2529 					adev->gfx.config.max_sh_per_se;
2530 
2531 	mutex_lock(&adev->grbm_idx_mutex);
2532 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2533 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2534 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2535 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2536 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2537 					       rb_bitmap_width_per_sh);
2538 		}
2539 	}
2540 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2541 	mutex_unlock(&adev->grbm_idx_mutex);
2542 
2543 	adev->gfx.config.backend_enable_mask = active_rbs;
2544 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2545 }
2546 
2547 #define DEFAULT_SH_MEM_BASES	(0x6000)
2548 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2549 {
2550 	int i;
2551 	uint32_t sh_mem_config;
2552 	uint32_t sh_mem_bases;
2553 
2554 	/*
2555 	 * Configure apertures:
2556 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2557 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2558 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2559 	 */
2560 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2561 
2562 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2563 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2564 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2565 
2566 	mutex_lock(&adev->srbm_mutex);
2567 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2568 		soc15_grbm_select(adev, 0, 0, 0, i);
2569 		/* CP and shaders */
2570 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2571 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2572 	}
2573 	soc15_grbm_select(adev, 0, 0, 0, 0);
2574 	mutex_unlock(&adev->srbm_mutex);
2575 
2576 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2577 	   acccess. These should be enabled by FW for target VMIDs. */
2578 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2579 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2580 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2581 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2582 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2583 	}
2584 }
2585 
2586 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2587 {
2588 	int vmid;
2589 
2590 	/*
2591 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2592 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2593 	 * the driver can enable them for graphics. VMID0 should maintain
2594 	 * access so that HWS firmware can save/restore entries.
2595 	 */
2596 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2597 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2598 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2599 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2600 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2601 	}
2602 }
2603 
2604 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2605 {
2606 	uint32_t tmp;
2607 
2608 	switch (adev->ip_versions[GC_HWIP][0]) {
2609 	case IP_VERSION(9, 4, 1):
2610 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2611 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2612 					DISABLE_BARRIER_WAITCNT, 1);
2613 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2614 		break;
2615 	default:
2616 		break;
2617 	}
2618 }
2619 
2620 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2621 {
2622 	u32 tmp;
2623 	int i;
2624 
2625 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2626 
2627 	gfx_v9_0_tiling_mode_table_init(adev);
2628 
2629 	gfx_v9_0_setup_rb(adev);
2630 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2631 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2632 
2633 	/* XXX SH_MEM regs */
2634 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2635 	mutex_lock(&adev->srbm_mutex);
2636 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2637 		soc15_grbm_select(adev, 0, 0, 0, i);
2638 		/* CP and shaders */
2639 		if (i == 0) {
2640 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2641 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2642 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2643 					    !!adev->gmc.noretry);
2644 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2645 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2646 		} else {
2647 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2648 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2649 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2650 					    !!adev->gmc.noretry);
2651 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2652 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2653 				(adev->gmc.private_aperture_start >> 48));
2654 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2655 				(adev->gmc.shared_aperture_start >> 48));
2656 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2657 		}
2658 	}
2659 	soc15_grbm_select(adev, 0, 0, 0, 0);
2660 
2661 	mutex_unlock(&adev->srbm_mutex);
2662 
2663 	gfx_v9_0_init_compute_vmid(adev);
2664 	gfx_v9_0_init_gds_vmid(adev);
2665 	gfx_v9_0_init_sq_config(adev);
2666 }
2667 
2668 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2669 {
2670 	u32 i, j, k;
2671 	u32 mask;
2672 
2673 	mutex_lock(&adev->grbm_idx_mutex);
2674 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2675 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2676 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2677 			for (k = 0; k < adev->usec_timeout; k++) {
2678 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2679 					break;
2680 				udelay(1);
2681 			}
2682 			if (k == adev->usec_timeout) {
2683 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2684 						      0xffffffff, 0xffffffff);
2685 				mutex_unlock(&adev->grbm_idx_mutex);
2686 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2687 					 i, j);
2688 				return;
2689 			}
2690 		}
2691 	}
2692 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2693 	mutex_unlock(&adev->grbm_idx_mutex);
2694 
2695 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2696 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2697 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2698 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2699 	for (k = 0; k < adev->usec_timeout; k++) {
2700 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2701 			break;
2702 		udelay(1);
2703 	}
2704 }
2705 
2706 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2707 					       bool enable)
2708 {
2709 	u32 tmp;
2710 
2711 	/* These interrupts should be enabled to drive DS clock */
2712 
2713 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2714 
2715 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2717 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2718 	if(adev->gfx.num_gfx_rings)
2719 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2720 
2721 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2722 }
2723 
2724 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2725 {
2726 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2727 	/* csib */
2728 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2729 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2730 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2731 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2732 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2733 			adev->gfx.rlc.clear_state_size);
2734 }
2735 
2736 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2737 				int indirect_offset,
2738 				int list_size,
2739 				int *unique_indirect_regs,
2740 				int unique_indirect_reg_count,
2741 				int *indirect_start_offsets,
2742 				int *indirect_start_offsets_count,
2743 				int max_start_offsets_count)
2744 {
2745 	int idx;
2746 
2747 	for (; indirect_offset < list_size; indirect_offset++) {
2748 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2749 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2750 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2751 
2752 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2753 			indirect_offset += 2;
2754 
2755 			/* look for the matching indice */
2756 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2757 				if (unique_indirect_regs[idx] ==
2758 					register_list_format[indirect_offset] ||
2759 					!unique_indirect_regs[idx])
2760 					break;
2761 			}
2762 
2763 			BUG_ON(idx >= unique_indirect_reg_count);
2764 
2765 			if (!unique_indirect_regs[idx])
2766 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2767 
2768 			indirect_offset++;
2769 		}
2770 	}
2771 }
2772 
2773 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2774 {
2775 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2776 	int unique_indirect_reg_count = 0;
2777 
2778 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2779 	int indirect_start_offsets_count = 0;
2780 
2781 	int list_size = 0;
2782 	int i = 0, j = 0;
2783 	u32 tmp = 0;
2784 
2785 	u32 *register_list_format =
2786 		kmemdup(adev->gfx.rlc.register_list_format,
2787 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2788 	if (!register_list_format)
2789 		return -ENOMEM;
2790 
2791 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2792 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2793 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2794 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2795 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2796 				    unique_indirect_regs,
2797 				    unique_indirect_reg_count,
2798 				    indirect_start_offsets,
2799 				    &indirect_start_offsets_count,
2800 				    ARRAY_SIZE(indirect_start_offsets));
2801 
2802 	/* enable auto inc in case it is disabled */
2803 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2804 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2805 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2806 
2807 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2808 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2809 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2810 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2811 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2812 			adev->gfx.rlc.register_restore[i]);
2813 
2814 	/* load indirect register */
2815 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2816 		adev->gfx.rlc.reg_list_format_start);
2817 
2818 	/* direct register portion */
2819 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2820 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2821 			register_list_format[i]);
2822 
2823 	/* indirect register portion */
2824 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2825 		if (register_list_format[i] == 0xFFFFFFFF) {
2826 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2827 			continue;
2828 		}
2829 
2830 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2831 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2832 
2833 		for (j = 0; j < unique_indirect_reg_count; j++) {
2834 			if (register_list_format[i] == unique_indirect_regs[j]) {
2835 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2836 				break;
2837 			}
2838 		}
2839 
2840 		BUG_ON(j >= unique_indirect_reg_count);
2841 
2842 		i++;
2843 	}
2844 
2845 	/* set save/restore list size */
2846 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2847 	list_size = list_size >> 1;
2848 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2849 		adev->gfx.rlc.reg_restore_list_size);
2850 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2851 
2852 	/* write the starting offsets to RLC scratch ram */
2853 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2854 		adev->gfx.rlc.starting_offsets_start);
2855 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2856 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2857 		       indirect_start_offsets[i]);
2858 
2859 	/* load unique indirect regs*/
2860 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2861 		if (unique_indirect_regs[i] != 0) {
2862 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2863 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2864 			       unique_indirect_regs[i] & 0x3FFFF);
2865 
2866 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2867 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2868 			       unique_indirect_regs[i] >> 20);
2869 		}
2870 	}
2871 
2872 	kfree(register_list_format);
2873 	return 0;
2874 }
2875 
2876 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2877 {
2878 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2879 }
2880 
2881 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2882 					     bool enable)
2883 {
2884 	uint32_t data = 0;
2885 	uint32_t default_data = 0;
2886 
2887 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2888 	if (enable) {
2889 		/* enable GFXIP control over CGPG */
2890 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2891 		if(default_data != data)
2892 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2893 
2894 		/* update status */
2895 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2896 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2897 		if(default_data != data)
2898 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2899 	} else {
2900 		/* restore GFXIP control over GCPG */
2901 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2902 		if(default_data != data)
2903 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2904 	}
2905 }
2906 
2907 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2908 {
2909 	uint32_t data = 0;
2910 
2911 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2912 			      AMD_PG_SUPPORT_GFX_SMG |
2913 			      AMD_PG_SUPPORT_GFX_DMG)) {
2914 		/* init IDLE_POLL_COUNT = 60 */
2915 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2916 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2917 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2918 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2919 
2920 		/* init RLC PG Delay */
2921 		data = 0;
2922 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2923 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2924 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2925 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2926 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2927 
2928 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2929 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2930 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2931 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2932 
2933 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2934 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2935 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2936 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2937 
2938 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2939 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2940 
2941 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2942 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2943 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2944 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2945 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2946 	}
2947 }
2948 
2949 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2950 						bool enable)
2951 {
2952 	uint32_t data = 0;
2953 	uint32_t default_data = 0;
2954 
2955 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2956 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2957 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2958 			     enable ? 1 : 0);
2959 	if (default_data != data)
2960 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2961 }
2962 
2963 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2964 						bool enable)
2965 {
2966 	uint32_t data = 0;
2967 	uint32_t default_data = 0;
2968 
2969 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2970 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2971 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2972 			     enable ? 1 : 0);
2973 	if(default_data != data)
2974 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2975 }
2976 
2977 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2978 					bool enable)
2979 {
2980 	uint32_t data = 0;
2981 	uint32_t default_data = 0;
2982 
2983 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2984 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2985 			     CP_PG_DISABLE,
2986 			     enable ? 0 : 1);
2987 	if(default_data != data)
2988 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2989 }
2990 
2991 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2992 						bool enable)
2993 {
2994 	uint32_t data, default_data;
2995 
2996 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2997 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2998 			     GFX_POWER_GATING_ENABLE,
2999 			     enable ? 1 : 0);
3000 	if(default_data != data)
3001 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3002 }
3003 
3004 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3005 						bool enable)
3006 {
3007 	uint32_t data, default_data;
3008 
3009 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3010 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3011 			     GFX_PIPELINE_PG_ENABLE,
3012 			     enable ? 1 : 0);
3013 	if(default_data != data)
3014 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3015 
3016 	if (!enable)
3017 		/* read any GFX register to wake up GFX */
3018 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3019 }
3020 
3021 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3022 						       bool enable)
3023 {
3024 	uint32_t data, default_data;
3025 
3026 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3027 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3028 			     STATIC_PER_CU_PG_ENABLE,
3029 			     enable ? 1 : 0);
3030 	if(default_data != data)
3031 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3032 }
3033 
3034 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3035 						bool enable)
3036 {
3037 	uint32_t data, default_data;
3038 
3039 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3040 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3041 			     DYN_PER_CU_PG_ENABLE,
3042 			     enable ? 1 : 0);
3043 	if(default_data != data)
3044 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3045 }
3046 
3047 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3048 {
3049 	gfx_v9_0_init_csb(adev);
3050 
3051 	/*
3052 	 * Rlc save restore list is workable since v2_1.
3053 	 * And it's needed by gfxoff feature.
3054 	 */
3055 	if (adev->gfx.rlc.is_rlc_v2_1) {
3056 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3057 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3058 			gfx_v9_1_init_rlc_save_restore_list(adev);
3059 		gfx_v9_0_enable_save_restore_machine(adev);
3060 	}
3061 
3062 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3063 			      AMD_PG_SUPPORT_GFX_SMG |
3064 			      AMD_PG_SUPPORT_GFX_DMG |
3065 			      AMD_PG_SUPPORT_CP |
3066 			      AMD_PG_SUPPORT_GDS |
3067 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3068 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
3069 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
3070 		gfx_v9_0_init_gfx_power_gating(adev);
3071 	}
3072 }
3073 
3074 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3075 {
3076 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3077 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3078 	gfx_v9_0_wait_for_rlc_serdes(adev);
3079 }
3080 
3081 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3082 {
3083 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3084 	udelay(50);
3085 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3086 	udelay(50);
3087 }
3088 
3089 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3090 {
3091 #ifdef AMDGPU_RLC_DEBUG_RETRY
3092 	u32 rlc_ucode_ver;
3093 #endif
3094 
3095 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3096 	udelay(50);
3097 
3098 	/* carrizo do enable cp interrupt after cp inited */
3099 	if (!(adev->flags & AMD_IS_APU)) {
3100 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3101 		udelay(50);
3102 	}
3103 
3104 #ifdef AMDGPU_RLC_DEBUG_RETRY
3105 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3106 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3107 	if(rlc_ucode_ver == 0x108) {
3108 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3109 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3110 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3111 		 * default is 0x9C4 to create a 100us interval */
3112 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3113 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3114 		 * to disable the page fault retry interrupts, default is
3115 		 * 0x100 (256) */
3116 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3117 	}
3118 #endif
3119 }
3120 
3121 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3122 {
3123 	const struct rlc_firmware_header_v2_0 *hdr;
3124 	const __le32 *fw_data;
3125 	unsigned i, fw_size;
3126 
3127 	if (!adev->gfx.rlc_fw)
3128 		return -EINVAL;
3129 
3130 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3131 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3132 
3133 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3134 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3135 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3136 
3137 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3138 			RLCG_UCODE_LOADING_START_ADDRESS);
3139 	for (i = 0; i < fw_size; i++)
3140 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3141 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3142 
3143 	return 0;
3144 }
3145 
3146 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3147 {
3148 	int r;
3149 
3150 	if (amdgpu_sriov_vf(adev)) {
3151 		gfx_v9_0_init_csb(adev);
3152 		return 0;
3153 	}
3154 
3155 	adev->gfx.rlc.funcs->stop(adev);
3156 
3157 	/* disable CG */
3158 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3159 
3160 	gfx_v9_0_init_pg(adev);
3161 
3162 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3163 		/* legacy rlc firmware loading */
3164 		r = gfx_v9_0_rlc_load_microcode(adev);
3165 		if (r)
3166 			return r;
3167 	}
3168 
3169 	switch (adev->ip_versions[GC_HWIP][0]) {
3170 	case IP_VERSION(9, 2, 2):
3171 	case IP_VERSION(9, 1, 0):
3172 		if (amdgpu_lbpw == 0)
3173 			gfx_v9_0_enable_lbpw(adev, false);
3174 		else
3175 			gfx_v9_0_enable_lbpw(adev, true);
3176 		break;
3177 	case IP_VERSION(9, 4, 0):
3178 		if (amdgpu_lbpw > 0)
3179 			gfx_v9_0_enable_lbpw(adev, true);
3180 		else
3181 			gfx_v9_0_enable_lbpw(adev, false);
3182 		break;
3183 	default:
3184 		break;
3185 	}
3186 
3187 	adev->gfx.rlc.funcs->start(adev);
3188 
3189 	return 0;
3190 }
3191 
3192 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3193 {
3194 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3195 
3196 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3197 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3198 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3199 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3200 	udelay(50);
3201 }
3202 
3203 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3204 {
3205 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3206 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3207 	const struct gfx_firmware_header_v1_0 *me_hdr;
3208 	const __le32 *fw_data;
3209 	unsigned i, fw_size;
3210 
3211 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3212 		return -EINVAL;
3213 
3214 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3215 		adev->gfx.pfp_fw->data;
3216 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3217 		adev->gfx.ce_fw->data;
3218 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3219 		adev->gfx.me_fw->data;
3220 
3221 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3222 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3223 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3224 
3225 	gfx_v9_0_cp_gfx_enable(adev, false);
3226 
3227 	/* PFP */
3228 	fw_data = (const __le32 *)
3229 		(adev->gfx.pfp_fw->data +
3230 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3231 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3232 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3233 	for (i = 0; i < fw_size; i++)
3234 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3235 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3236 
3237 	/* CE */
3238 	fw_data = (const __le32 *)
3239 		(adev->gfx.ce_fw->data +
3240 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3241 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3242 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3243 	for (i = 0; i < fw_size; i++)
3244 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3245 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3246 
3247 	/* ME */
3248 	fw_data = (const __le32 *)
3249 		(adev->gfx.me_fw->data +
3250 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3251 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3252 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3253 	for (i = 0; i < fw_size; i++)
3254 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3255 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3256 
3257 	return 0;
3258 }
3259 
3260 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3261 {
3262 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3263 	const struct cs_section_def *sect = NULL;
3264 	const struct cs_extent_def *ext = NULL;
3265 	int r, i, tmp;
3266 
3267 	/* init the CP */
3268 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3269 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3270 
3271 	gfx_v9_0_cp_gfx_enable(adev, true);
3272 
3273 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3274 	if (r) {
3275 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3276 		return r;
3277 	}
3278 
3279 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3280 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3281 
3282 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3283 	amdgpu_ring_write(ring, 0x80000000);
3284 	amdgpu_ring_write(ring, 0x80000000);
3285 
3286 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3287 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3288 			if (sect->id == SECT_CONTEXT) {
3289 				amdgpu_ring_write(ring,
3290 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3291 					       ext->reg_count));
3292 				amdgpu_ring_write(ring,
3293 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3294 				for (i = 0; i < ext->reg_count; i++)
3295 					amdgpu_ring_write(ring, ext->extent[i]);
3296 			}
3297 		}
3298 	}
3299 
3300 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3301 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3302 
3303 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3304 	amdgpu_ring_write(ring, 0);
3305 
3306 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3307 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3308 	amdgpu_ring_write(ring, 0x8000);
3309 	amdgpu_ring_write(ring, 0x8000);
3310 
3311 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3312 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3313 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3314 	amdgpu_ring_write(ring, tmp);
3315 	amdgpu_ring_write(ring, 0);
3316 
3317 	amdgpu_ring_commit(ring);
3318 
3319 	return 0;
3320 }
3321 
3322 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3323 {
3324 	struct amdgpu_ring *ring;
3325 	u32 tmp;
3326 	u32 rb_bufsz;
3327 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3328 
3329 	/* Set the write pointer delay */
3330 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3331 
3332 	/* set the RB to use vmid 0 */
3333 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3334 
3335 	/* Set ring buffer size */
3336 	ring = &adev->gfx.gfx_ring[0];
3337 	rb_bufsz = order_base_2(ring->ring_size / 8);
3338 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3339 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3340 #ifdef __BIG_ENDIAN
3341 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3342 #endif
3343 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3344 
3345 	/* Initialize the ring buffer's write pointers */
3346 	ring->wptr = 0;
3347 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3348 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3349 
3350 	/* set the wb address wether it's enabled or not */
3351 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3352 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3353 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3354 
3355 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3356 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3357 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3358 
3359 	mdelay(1);
3360 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3361 
3362 	rb_addr = ring->gpu_addr >> 8;
3363 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3364 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3365 
3366 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3367 	if (ring->use_doorbell) {
3368 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3369 				    DOORBELL_OFFSET, ring->doorbell_index);
3370 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3371 				    DOORBELL_EN, 1);
3372 	} else {
3373 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3374 	}
3375 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3376 
3377 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3378 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3379 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3380 
3381 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3382 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3383 
3384 
3385 	/* start the ring */
3386 	gfx_v9_0_cp_gfx_start(adev);
3387 	ring->sched.ready = true;
3388 
3389 	return 0;
3390 }
3391 
3392 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3393 {
3394 	if (enable) {
3395 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3396 	} else {
3397 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3398 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3399 		adev->gfx.kiq.ring.sched.ready = false;
3400 	}
3401 	udelay(50);
3402 }
3403 
3404 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3405 {
3406 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3407 	const __le32 *fw_data;
3408 	unsigned i;
3409 	u32 tmp;
3410 
3411 	if (!adev->gfx.mec_fw)
3412 		return -EINVAL;
3413 
3414 	gfx_v9_0_cp_compute_enable(adev, false);
3415 
3416 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3417 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3418 
3419 	fw_data = (const __le32 *)
3420 		(adev->gfx.mec_fw->data +
3421 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3422 	tmp = 0;
3423 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3424 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3425 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3426 
3427 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3428 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3429 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3430 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3431 
3432 	/* MEC1 */
3433 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3434 			 mec_hdr->jt_offset);
3435 	for (i = 0; i < mec_hdr->jt_size; i++)
3436 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3437 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3438 
3439 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3440 			adev->gfx.mec_fw_version);
3441 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3442 
3443 	return 0;
3444 }
3445 
3446 /* KIQ functions */
3447 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3448 {
3449 	uint32_t tmp;
3450 	struct amdgpu_device *adev = ring->adev;
3451 
3452 	/* tell RLC which is KIQ queue */
3453 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3454 	tmp &= 0xffffff00;
3455 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3456 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3457 	tmp |= 0x80;
3458 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3459 }
3460 
3461 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3462 {
3463 	struct amdgpu_device *adev = ring->adev;
3464 
3465 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3466 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3467 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3468 			mqd->cp_hqd_queue_priority =
3469 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3470 		}
3471 	}
3472 }
3473 
3474 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3475 {
3476 	struct amdgpu_device *adev = ring->adev;
3477 	struct v9_mqd *mqd = ring->mqd_ptr;
3478 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3479 	uint32_t tmp;
3480 
3481 	mqd->header = 0xC0310800;
3482 	mqd->compute_pipelinestat_enable = 0x00000001;
3483 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3484 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3485 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3486 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3487 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3488 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3489 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3490 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3491 	mqd->compute_misc_reserved = 0x00000003;
3492 
3493 	mqd->dynamic_cu_mask_addr_lo =
3494 		lower_32_bits(ring->mqd_gpu_addr
3495 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3496 	mqd->dynamic_cu_mask_addr_hi =
3497 		upper_32_bits(ring->mqd_gpu_addr
3498 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3499 
3500 	eop_base_addr = ring->eop_gpu_addr >> 8;
3501 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3502 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3503 
3504 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3505 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3506 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3507 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3508 
3509 	mqd->cp_hqd_eop_control = tmp;
3510 
3511 	/* enable doorbell? */
3512 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3513 
3514 	if (ring->use_doorbell) {
3515 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3516 				    DOORBELL_OFFSET, ring->doorbell_index);
3517 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518 				    DOORBELL_EN, 1);
3519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520 				    DOORBELL_SOURCE, 0);
3521 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522 				    DOORBELL_HIT, 0);
3523 	} else {
3524 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525 					 DOORBELL_EN, 0);
3526 	}
3527 
3528 	mqd->cp_hqd_pq_doorbell_control = tmp;
3529 
3530 	/* disable the queue if it's active */
3531 	ring->wptr = 0;
3532 	mqd->cp_hqd_dequeue_request = 0;
3533 	mqd->cp_hqd_pq_rptr = 0;
3534 	mqd->cp_hqd_pq_wptr_lo = 0;
3535 	mqd->cp_hqd_pq_wptr_hi = 0;
3536 
3537 	/* set the pointer to the MQD */
3538 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3539 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3540 
3541 	/* set MQD vmid to 0 */
3542 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3543 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3544 	mqd->cp_mqd_control = tmp;
3545 
3546 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3547 	hqd_gpu_addr = ring->gpu_addr >> 8;
3548 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3549 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3550 
3551 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3552 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3553 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3554 			    (order_base_2(ring->ring_size / 4) - 1));
3555 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3556 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3557 #ifdef __BIG_ENDIAN
3558 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3559 #endif
3560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3561 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3562 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3563 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3564 	mqd->cp_hqd_pq_control = tmp;
3565 
3566 	/* set the wb address whether it's enabled or not */
3567 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3568 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3569 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3570 		upper_32_bits(wb_gpu_addr) & 0xffff;
3571 
3572 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3573 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3574 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3575 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3576 
3577 	tmp = 0;
3578 	/* enable the doorbell if requested */
3579 	if (ring->use_doorbell) {
3580 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3581 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3582 				DOORBELL_OFFSET, ring->doorbell_index);
3583 
3584 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3585 					 DOORBELL_EN, 1);
3586 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3587 					 DOORBELL_SOURCE, 0);
3588 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3589 					 DOORBELL_HIT, 0);
3590 	}
3591 
3592 	mqd->cp_hqd_pq_doorbell_control = tmp;
3593 
3594 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3595 	ring->wptr = 0;
3596 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3597 
3598 	/* set the vmid for the queue */
3599 	mqd->cp_hqd_vmid = 0;
3600 
3601 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3602 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3603 	mqd->cp_hqd_persistent_state = tmp;
3604 
3605 	/* set MIN_IB_AVAIL_SIZE */
3606 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3607 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3608 	mqd->cp_hqd_ib_control = tmp;
3609 
3610 	/* set static priority for a queue/ring */
3611 	gfx_v9_0_mqd_set_priority(ring, mqd);
3612 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3613 
3614 	/* map_queues packet doesn't need activate the queue,
3615 	 * so only kiq need set this field.
3616 	 */
3617 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3618 		mqd->cp_hqd_active = 1;
3619 
3620 	return 0;
3621 }
3622 
3623 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3624 {
3625 	struct amdgpu_device *adev = ring->adev;
3626 	struct v9_mqd *mqd = ring->mqd_ptr;
3627 	int j;
3628 
3629 	/* disable wptr polling */
3630 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3631 
3632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3633 	       mqd->cp_hqd_eop_base_addr_lo);
3634 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3635 	       mqd->cp_hqd_eop_base_addr_hi);
3636 
3637 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3638 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3639 	       mqd->cp_hqd_eop_control);
3640 
3641 	/* enable doorbell? */
3642 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3643 	       mqd->cp_hqd_pq_doorbell_control);
3644 
3645 	/* disable the queue if it's active */
3646 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3647 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3648 		for (j = 0; j < adev->usec_timeout; j++) {
3649 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3650 				break;
3651 			udelay(1);
3652 		}
3653 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3654 		       mqd->cp_hqd_dequeue_request);
3655 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3656 		       mqd->cp_hqd_pq_rptr);
3657 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3658 		       mqd->cp_hqd_pq_wptr_lo);
3659 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3660 		       mqd->cp_hqd_pq_wptr_hi);
3661 	}
3662 
3663 	/* set the pointer to the MQD */
3664 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3665 	       mqd->cp_mqd_base_addr_lo);
3666 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3667 	       mqd->cp_mqd_base_addr_hi);
3668 
3669 	/* set MQD vmid to 0 */
3670 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3671 	       mqd->cp_mqd_control);
3672 
3673 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3674 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3675 	       mqd->cp_hqd_pq_base_lo);
3676 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3677 	       mqd->cp_hqd_pq_base_hi);
3678 
3679 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3680 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3681 	       mqd->cp_hqd_pq_control);
3682 
3683 	/* set the wb address whether it's enabled or not */
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3685 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3686 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3687 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3688 
3689 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3690 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3691 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3692 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3693 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3694 
3695 	/* enable the doorbell if requested */
3696 	if (ring->use_doorbell) {
3697 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3698 					(adev->doorbell_index.kiq * 2) << 2);
3699 		/* If GC has entered CGPG, ringing doorbell > first page
3700 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3701 		 * workaround this issue. And this change has to align with firmware
3702 		 * update.
3703 		 */
3704 		if (check_if_enlarge_doorbell_range(adev))
3705 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3706 					(adev->doorbell.size - 4));
3707 		else
3708 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3709 					(adev->doorbell_index.userqueue_end * 2) << 2);
3710 	}
3711 
3712 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3713 	       mqd->cp_hqd_pq_doorbell_control);
3714 
3715 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3716 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3717 	       mqd->cp_hqd_pq_wptr_lo);
3718 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3719 	       mqd->cp_hqd_pq_wptr_hi);
3720 
3721 	/* set the vmid for the queue */
3722 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3723 
3724 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3725 	       mqd->cp_hqd_persistent_state);
3726 
3727 	/* activate the queue */
3728 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3729 	       mqd->cp_hqd_active);
3730 
3731 	if (ring->use_doorbell)
3732 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3733 
3734 	return 0;
3735 }
3736 
3737 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3738 {
3739 	struct amdgpu_device *adev = ring->adev;
3740 	int j;
3741 
3742 	/* disable the queue if it's active */
3743 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3744 
3745 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3746 
3747 		for (j = 0; j < adev->usec_timeout; j++) {
3748 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3749 				break;
3750 			udelay(1);
3751 		}
3752 
3753 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3754 			DRM_DEBUG("KIQ dequeue request failed.\n");
3755 
3756 			/* Manual disable if dequeue request times out */
3757 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3758 		}
3759 
3760 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3761 		      0);
3762 	}
3763 
3764 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3765 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3766 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3767 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3768 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3769 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3770 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3771 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3772 
3773 	return 0;
3774 }
3775 
3776 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3777 {
3778 	struct amdgpu_device *adev = ring->adev;
3779 	struct v9_mqd *mqd = ring->mqd_ptr;
3780 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3781 	struct v9_mqd *tmp_mqd;
3782 
3783 	gfx_v9_0_kiq_setting(ring);
3784 
3785 	/* GPU could be in bad state during probe, driver trigger the reset
3786 	 * after load the SMU, in this case , the mqd is not be initialized.
3787 	 * driver need to re-init the mqd.
3788 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3789 	 */
3790 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3791 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3792 		/* for GPU_RESET case , reset MQD to a clean status */
3793 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3794 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3795 
3796 		/* reset ring buffer */
3797 		ring->wptr = 0;
3798 		amdgpu_ring_clear_ring(ring);
3799 
3800 		mutex_lock(&adev->srbm_mutex);
3801 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3802 		gfx_v9_0_kiq_init_register(ring);
3803 		soc15_grbm_select(adev, 0, 0, 0, 0);
3804 		mutex_unlock(&adev->srbm_mutex);
3805 	} else {
3806 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3807 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3808 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3809 		mutex_lock(&adev->srbm_mutex);
3810 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3811 		gfx_v9_0_mqd_init(ring);
3812 		gfx_v9_0_kiq_init_register(ring);
3813 		soc15_grbm_select(adev, 0, 0, 0, 0);
3814 		mutex_unlock(&adev->srbm_mutex);
3815 
3816 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3817 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3818 	}
3819 
3820 	return 0;
3821 }
3822 
3823 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3824 {
3825 	struct amdgpu_device *adev = ring->adev;
3826 	struct v9_mqd *mqd = ring->mqd_ptr;
3827 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3828 	struct v9_mqd *tmp_mqd;
3829 
3830 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3831 	 * is not be initialized before
3832 	 */
3833 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3834 
3835 	if (!tmp_mqd->cp_hqd_pq_control ||
3836 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3837 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3838 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3839 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3840 		mutex_lock(&adev->srbm_mutex);
3841 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3842 		gfx_v9_0_mqd_init(ring);
3843 		soc15_grbm_select(adev, 0, 0, 0, 0);
3844 		mutex_unlock(&adev->srbm_mutex);
3845 
3846 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3847 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3848 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3849 		/* reset MQD to a clean status */
3850 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3851 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3852 
3853 		/* reset ring buffer */
3854 		ring->wptr = 0;
3855 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3856 		amdgpu_ring_clear_ring(ring);
3857 	} else {
3858 		amdgpu_ring_clear_ring(ring);
3859 	}
3860 
3861 	return 0;
3862 }
3863 
3864 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3865 {
3866 	struct amdgpu_ring *ring;
3867 	int r;
3868 
3869 	ring = &adev->gfx.kiq.ring;
3870 
3871 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3872 	if (unlikely(r != 0))
3873 		return r;
3874 
3875 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3876 	if (unlikely(r != 0))
3877 		return r;
3878 
3879 	gfx_v9_0_kiq_init_queue(ring);
3880 	amdgpu_bo_kunmap(ring->mqd_obj);
3881 	ring->mqd_ptr = NULL;
3882 	amdgpu_bo_unreserve(ring->mqd_obj);
3883 	ring->sched.ready = true;
3884 	return 0;
3885 }
3886 
3887 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3888 {
3889 	struct amdgpu_ring *ring = NULL;
3890 	int r = 0, i;
3891 
3892 	gfx_v9_0_cp_compute_enable(adev, true);
3893 
3894 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3895 		ring = &adev->gfx.compute_ring[i];
3896 
3897 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3898 		if (unlikely(r != 0))
3899 			goto done;
3900 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3901 		if (!r) {
3902 			r = gfx_v9_0_kcq_init_queue(ring);
3903 			amdgpu_bo_kunmap(ring->mqd_obj);
3904 			ring->mqd_ptr = NULL;
3905 		}
3906 		amdgpu_bo_unreserve(ring->mqd_obj);
3907 		if (r)
3908 			goto done;
3909 	}
3910 
3911 	r = amdgpu_gfx_enable_kcq(adev);
3912 done:
3913 	return r;
3914 }
3915 
3916 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3917 {
3918 	int r, i;
3919 	struct amdgpu_ring *ring;
3920 
3921 	if (!(adev->flags & AMD_IS_APU))
3922 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3923 
3924 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3925 		if (adev->gfx.num_gfx_rings) {
3926 			/* legacy firmware loading */
3927 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3928 			if (r)
3929 				return r;
3930 		}
3931 
3932 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3933 		if (r)
3934 			return r;
3935 	}
3936 
3937 	r = gfx_v9_0_kiq_resume(adev);
3938 	if (r)
3939 		return r;
3940 
3941 	if (adev->gfx.num_gfx_rings) {
3942 		r = gfx_v9_0_cp_gfx_resume(adev);
3943 		if (r)
3944 			return r;
3945 	}
3946 
3947 	r = gfx_v9_0_kcq_resume(adev);
3948 	if (r)
3949 		return r;
3950 
3951 	if (adev->gfx.num_gfx_rings) {
3952 		ring = &adev->gfx.gfx_ring[0];
3953 		r = amdgpu_ring_test_helper(ring);
3954 		if (r)
3955 			return r;
3956 	}
3957 
3958 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3959 		ring = &adev->gfx.compute_ring[i];
3960 		amdgpu_ring_test_helper(ring);
3961 	}
3962 
3963 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3964 
3965 	return 0;
3966 }
3967 
3968 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3969 {
3970 	u32 tmp;
3971 
3972 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3973 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3974 		return;
3975 
3976 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3977 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3978 				adev->df.hash_status.hash_64k);
3979 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3980 				adev->df.hash_status.hash_2m);
3981 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3982 				adev->df.hash_status.hash_1g);
3983 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3984 }
3985 
3986 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3987 {
3988 	if (adev->gfx.num_gfx_rings)
3989 		gfx_v9_0_cp_gfx_enable(adev, enable);
3990 	gfx_v9_0_cp_compute_enable(adev, enable);
3991 }
3992 
3993 static int gfx_v9_0_hw_init(void *handle)
3994 {
3995 	int r;
3996 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3997 
3998 	if (!amdgpu_sriov_vf(adev))
3999 		gfx_v9_0_init_golden_registers(adev);
4000 
4001 	gfx_v9_0_constants_init(adev);
4002 
4003 	gfx_v9_0_init_tcp_config(adev);
4004 
4005 	r = adev->gfx.rlc.funcs->resume(adev);
4006 	if (r)
4007 		return r;
4008 
4009 	r = gfx_v9_0_cp_resume(adev);
4010 	if (r)
4011 		return r;
4012 
4013 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4014 		gfx_v9_4_2_set_power_brake_sequence(adev);
4015 
4016 	return r;
4017 }
4018 
4019 static int gfx_v9_0_hw_fini(void *handle)
4020 {
4021 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4022 
4023 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4024 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4025 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4026 
4027 	/* DF freeze and kcq disable will fail */
4028 	if (!amdgpu_ras_intr_triggered())
4029 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4030 		amdgpu_gfx_disable_kcq(adev);
4031 
4032 	if (amdgpu_sriov_vf(adev)) {
4033 		gfx_v9_0_cp_gfx_enable(adev, false);
4034 		/* must disable polling for SRIOV when hw finished, otherwise
4035 		 * CPC engine may still keep fetching WB address which is already
4036 		 * invalid after sw finished and trigger DMAR reading error in
4037 		 * hypervisor side.
4038 		 */
4039 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4040 		return 0;
4041 	}
4042 
4043 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4044 	 * otherwise KIQ is hanging when binding back
4045 	 */
4046 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4047 		mutex_lock(&adev->srbm_mutex);
4048 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4049 				adev->gfx.kiq.ring.pipe,
4050 				adev->gfx.kiq.ring.queue, 0);
4051 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4052 		soc15_grbm_select(adev, 0, 0, 0, 0);
4053 		mutex_unlock(&adev->srbm_mutex);
4054 	}
4055 
4056 	gfx_v9_0_cp_enable(adev, false);
4057 
4058 	/* Skip suspend with A+A reset */
4059 	if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4060 		dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4061 		return 0;
4062 	}
4063 
4064 	adev->gfx.rlc.funcs->stop(adev);
4065 	return 0;
4066 }
4067 
4068 static int gfx_v9_0_suspend(void *handle)
4069 {
4070 	return gfx_v9_0_hw_fini(handle);
4071 }
4072 
4073 static int gfx_v9_0_resume(void *handle)
4074 {
4075 	return gfx_v9_0_hw_init(handle);
4076 }
4077 
4078 static bool gfx_v9_0_is_idle(void *handle)
4079 {
4080 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4081 
4082 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4083 				GRBM_STATUS, GUI_ACTIVE))
4084 		return false;
4085 	else
4086 		return true;
4087 }
4088 
4089 static int gfx_v9_0_wait_for_idle(void *handle)
4090 {
4091 	unsigned i;
4092 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4093 
4094 	for (i = 0; i < adev->usec_timeout; i++) {
4095 		if (gfx_v9_0_is_idle(handle))
4096 			return 0;
4097 		udelay(1);
4098 	}
4099 	return -ETIMEDOUT;
4100 }
4101 
4102 static int gfx_v9_0_soft_reset(void *handle)
4103 {
4104 	u32 grbm_soft_reset = 0;
4105 	u32 tmp;
4106 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4107 
4108 	/* GRBM_STATUS */
4109 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4110 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4111 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4112 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4113 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4114 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4115 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4116 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4117 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4118 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4119 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4120 	}
4121 
4122 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4123 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4124 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4125 	}
4126 
4127 	/* GRBM_STATUS2 */
4128 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4129 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4130 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4131 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4132 
4133 
4134 	if (grbm_soft_reset) {
4135 		/* stop the rlc */
4136 		adev->gfx.rlc.funcs->stop(adev);
4137 
4138 		if (adev->gfx.num_gfx_rings)
4139 			/* Disable GFX parsing/prefetching */
4140 			gfx_v9_0_cp_gfx_enable(adev, false);
4141 
4142 		/* Disable MEC parsing/prefetching */
4143 		gfx_v9_0_cp_compute_enable(adev, false);
4144 
4145 		if (grbm_soft_reset) {
4146 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4147 			tmp |= grbm_soft_reset;
4148 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4149 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4150 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4151 
4152 			udelay(50);
4153 
4154 			tmp &= ~grbm_soft_reset;
4155 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4156 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4157 		}
4158 
4159 		/* Wait a little for things to settle down */
4160 		udelay(50);
4161 	}
4162 	return 0;
4163 }
4164 
4165 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4166 {
4167 	signed long r, cnt = 0;
4168 	unsigned long flags;
4169 	uint32_t seq, reg_val_offs = 0;
4170 	uint64_t value = 0;
4171 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4172 	struct amdgpu_ring *ring = &kiq->ring;
4173 
4174 	BUG_ON(!ring->funcs->emit_rreg);
4175 
4176 	spin_lock_irqsave(&kiq->ring_lock, flags);
4177 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4178 		pr_err("critical bug! too many kiq readers\n");
4179 		goto failed_unlock;
4180 	}
4181 	amdgpu_ring_alloc(ring, 32);
4182 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4183 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4184 				(5 << 8) |	/* dst: memory */
4185 				(1 << 16) |	/* count sel */
4186 				(1 << 20));	/* write confirm */
4187 	amdgpu_ring_write(ring, 0);
4188 	amdgpu_ring_write(ring, 0);
4189 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4190 				reg_val_offs * 4));
4191 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4192 				reg_val_offs * 4));
4193 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4194 	if (r)
4195 		goto failed_undo;
4196 
4197 	amdgpu_ring_commit(ring);
4198 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4199 
4200 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4201 
4202 	/* don't wait anymore for gpu reset case because this way may
4203 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4204 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4205 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4206 	 * gpu_recover() hang there.
4207 	 *
4208 	 * also don't wait anymore for IRQ context
4209 	 * */
4210 	if (r < 1 && (amdgpu_in_reset(adev)))
4211 		goto failed_kiq_read;
4212 
4213 	might_sleep();
4214 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4215 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4216 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4217 	}
4218 
4219 	if (cnt > MAX_KIQ_REG_TRY)
4220 		goto failed_kiq_read;
4221 
4222 	mb();
4223 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4224 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4225 	amdgpu_device_wb_free(adev, reg_val_offs);
4226 	return value;
4227 
4228 failed_undo:
4229 	amdgpu_ring_undo(ring);
4230 failed_unlock:
4231 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4232 failed_kiq_read:
4233 	if (reg_val_offs)
4234 		amdgpu_device_wb_free(adev, reg_val_offs);
4235 	pr_err("failed to read gpu clock\n");
4236 	return ~0;
4237 }
4238 
4239 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4240 {
4241 	uint64_t clock;
4242 
4243 	amdgpu_gfx_off_ctrl(adev, false);
4244 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4245 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4246 		clock = gfx_v9_0_kiq_read_clock(adev);
4247 	} else {
4248 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4249 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4250 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4251 	}
4252 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4253 	amdgpu_gfx_off_ctrl(adev, true);
4254 	return clock;
4255 }
4256 
4257 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4258 					  uint32_t vmid,
4259 					  uint32_t gds_base, uint32_t gds_size,
4260 					  uint32_t gws_base, uint32_t gws_size,
4261 					  uint32_t oa_base, uint32_t oa_size)
4262 {
4263 	struct amdgpu_device *adev = ring->adev;
4264 
4265 	/* GDS Base */
4266 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4267 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4268 				   gds_base);
4269 
4270 	/* GDS Size */
4271 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4272 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4273 				   gds_size);
4274 
4275 	/* GWS */
4276 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4277 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4278 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4279 
4280 	/* OA */
4281 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4282 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4283 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4284 }
4285 
4286 static const u32 vgpr_init_compute_shader[] =
4287 {
4288 	0xb07c0000, 0xbe8000ff,
4289 	0x000000f8, 0xbf110800,
4290 	0x7e000280, 0x7e020280,
4291 	0x7e040280, 0x7e060280,
4292 	0x7e080280, 0x7e0a0280,
4293 	0x7e0c0280, 0x7e0e0280,
4294 	0x80808800, 0xbe803200,
4295 	0xbf84fff5, 0xbf9c0000,
4296 	0xd28c0001, 0x0001007f,
4297 	0xd28d0001, 0x0002027e,
4298 	0x10020288, 0xb8810904,
4299 	0xb7814000, 0xd1196a01,
4300 	0x00000301, 0xbe800087,
4301 	0xbefc00c1, 0xd89c4000,
4302 	0x00020201, 0xd89cc080,
4303 	0x00040401, 0x320202ff,
4304 	0x00000800, 0x80808100,
4305 	0xbf84fff8, 0x7e020280,
4306 	0xbf810000, 0x00000000,
4307 };
4308 
4309 static const u32 sgpr_init_compute_shader[] =
4310 {
4311 	0xb07c0000, 0xbe8000ff,
4312 	0x0000005f, 0xbee50080,
4313 	0xbe812c65, 0xbe822c65,
4314 	0xbe832c65, 0xbe842c65,
4315 	0xbe852c65, 0xb77c0005,
4316 	0x80808500, 0xbf84fff8,
4317 	0xbe800080, 0xbf810000,
4318 };
4319 
4320 static const u32 vgpr_init_compute_shader_arcturus[] = {
4321 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4322 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4323 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4324 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4325 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4326 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4327 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4328 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4329 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4330 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4331 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4332 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4333 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4334 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4335 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4336 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4337 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4338 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4339 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4340 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4341 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4342 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4343 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4344 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4345 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4346 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4347 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4348 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4349 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4350 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4351 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4352 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4353 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4354 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4355 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4356 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4357 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4358 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4359 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4360 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4361 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4362 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4363 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4364 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4365 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4366 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4367 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4368 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4369 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4370 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4371 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4372 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4373 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4374 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4375 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4376 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4377 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4378 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4379 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4380 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4381 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4382 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4383 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4384 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4385 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4386 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4387 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4388 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4389 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4390 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4391 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4392 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4393 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4394 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4395 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4396 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4397 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4398 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4399 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4400 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4401 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4402 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4403 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4404 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4405 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4406 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4407 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4408 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4409 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4410 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4411 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4412 	0xbf84fff8, 0xbf810000,
4413 };
4414 
4415 /* When below register arrays changed, please update gpr_reg_size,
4416   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4417   to cover all gfx9 ASICs */
4418 static const struct soc15_reg_entry vgpr_init_regs[] = {
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4431    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4432    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4433 };
4434 
4435 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4450 };
4451 
4452 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4467 };
4468 
4469 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4484 };
4485 
4486 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4487    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4488    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4489    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4490    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4499    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4500    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4501    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4502    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4503    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4504    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4508    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4510    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4511    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4512    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4513    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4514    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4515    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4516    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4517    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4518    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4519    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4520 };
4521 
4522 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4523 {
4524 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4525 	int i, r;
4526 
4527 	/* only support when RAS is enabled */
4528 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4529 		return 0;
4530 
4531 	r = amdgpu_ring_alloc(ring, 7);
4532 	if (r) {
4533 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4534 			ring->name, r);
4535 		return r;
4536 	}
4537 
4538 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4539 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4540 
4541 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4542 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4543 				PACKET3_DMA_DATA_DST_SEL(1) |
4544 				PACKET3_DMA_DATA_SRC_SEL(2) |
4545 				PACKET3_DMA_DATA_ENGINE(0)));
4546 	amdgpu_ring_write(ring, 0);
4547 	amdgpu_ring_write(ring, 0);
4548 	amdgpu_ring_write(ring, 0);
4549 	amdgpu_ring_write(ring, 0);
4550 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4551 				adev->gds.gds_size);
4552 
4553 	amdgpu_ring_commit(ring);
4554 
4555 	for (i = 0; i < adev->usec_timeout; i++) {
4556 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4557 			break;
4558 		udelay(1);
4559 	}
4560 
4561 	if (i >= adev->usec_timeout)
4562 		r = -ETIMEDOUT;
4563 
4564 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4565 
4566 	return r;
4567 }
4568 
4569 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4570 {
4571 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4572 	struct amdgpu_ib ib;
4573 	struct dma_fence *f = NULL;
4574 	int r, i;
4575 	unsigned total_size, vgpr_offset, sgpr_offset;
4576 	u64 gpu_addr;
4577 
4578 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4579 						adev->gfx.config.max_cu_per_sh *
4580 						adev->gfx.config.max_sh_per_se;
4581 	int sgpr_work_group_size = 5;
4582 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4583 	int vgpr_init_shader_size;
4584 	const u32 *vgpr_init_shader_ptr;
4585 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4586 
4587 	/* only support when RAS is enabled */
4588 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4589 		return 0;
4590 
4591 	/* bail if the compute ring is not ready */
4592 	if (!ring->sched.ready)
4593 		return 0;
4594 
4595 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4596 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4597 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4598 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4599 	} else {
4600 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4601 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4602 		vgpr_init_regs_ptr = vgpr_init_regs;
4603 	}
4604 
4605 	total_size =
4606 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4607 	total_size +=
4608 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4609 	total_size +=
4610 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4611 	total_size = ALIGN(total_size, 256);
4612 	vgpr_offset = total_size;
4613 	total_size += ALIGN(vgpr_init_shader_size, 256);
4614 	sgpr_offset = total_size;
4615 	total_size += sizeof(sgpr_init_compute_shader);
4616 
4617 	/* allocate an indirect buffer to put the commands in */
4618 	memset(&ib, 0, sizeof(ib));
4619 	r = amdgpu_ib_get(adev, NULL, total_size,
4620 					AMDGPU_IB_POOL_DIRECT, &ib);
4621 	if (r) {
4622 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4623 		return r;
4624 	}
4625 
4626 	/* load the compute shaders */
4627 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4628 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4629 
4630 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4631 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4632 
4633 	/* init the ib length to 0 */
4634 	ib.length_dw = 0;
4635 
4636 	/* VGPR */
4637 	/* write the register state for the compute dispatch */
4638 	for (i = 0; i < gpr_reg_size; i++) {
4639 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4640 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4641 								- PACKET3_SET_SH_REG_START;
4642 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4643 	}
4644 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4645 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4646 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4647 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4648 							- PACKET3_SET_SH_REG_START;
4649 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4650 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4651 
4652 	/* write dispatch packet */
4653 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4654 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4655 	ib.ptr[ib.length_dw++] = 1; /* y */
4656 	ib.ptr[ib.length_dw++] = 1; /* z */
4657 	ib.ptr[ib.length_dw++] =
4658 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4659 
4660 	/* write CS partial flush packet */
4661 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4662 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4663 
4664 	/* SGPR1 */
4665 	/* write the register state for the compute dispatch */
4666 	for (i = 0; i < gpr_reg_size; i++) {
4667 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4668 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4669 								- PACKET3_SET_SH_REG_START;
4670 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4671 	}
4672 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4673 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4675 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4676 							- PACKET3_SET_SH_REG_START;
4677 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4678 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4679 
4680 	/* write dispatch packet */
4681 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4682 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4683 	ib.ptr[ib.length_dw++] = 1; /* y */
4684 	ib.ptr[ib.length_dw++] = 1; /* z */
4685 	ib.ptr[ib.length_dw++] =
4686 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4687 
4688 	/* write CS partial flush packet */
4689 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4690 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4691 
4692 	/* SGPR2 */
4693 	/* write the register state for the compute dispatch */
4694 	for (i = 0; i < gpr_reg_size; i++) {
4695 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4696 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4697 								- PACKET3_SET_SH_REG_START;
4698 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4699 	}
4700 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4701 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4702 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4703 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4704 							- PACKET3_SET_SH_REG_START;
4705 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4706 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4707 
4708 	/* write dispatch packet */
4709 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4710 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4711 	ib.ptr[ib.length_dw++] = 1; /* y */
4712 	ib.ptr[ib.length_dw++] = 1; /* z */
4713 	ib.ptr[ib.length_dw++] =
4714 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4715 
4716 	/* write CS partial flush packet */
4717 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4718 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4719 
4720 	/* shedule the ib on the ring */
4721 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4722 	if (r) {
4723 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4724 		goto fail;
4725 	}
4726 
4727 	/* wait for the GPU to finish processing the IB */
4728 	r = dma_fence_wait(f, false);
4729 	if (r) {
4730 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4731 		goto fail;
4732 	}
4733 
4734 fail:
4735 	amdgpu_ib_free(adev, &ib, NULL);
4736 	dma_fence_put(f);
4737 
4738 	return r;
4739 }
4740 
4741 static int gfx_v9_0_early_init(void *handle)
4742 {
4743 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4744 
4745 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4746 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4747 		adev->gfx.num_gfx_rings = 0;
4748 	else
4749 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4750 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4751 					  AMDGPU_MAX_COMPUTE_RINGS);
4752 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4753 	gfx_v9_0_set_ring_funcs(adev);
4754 	gfx_v9_0_set_irq_funcs(adev);
4755 	gfx_v9_0_set_gds_init(adev);
4756 	gfx_v9_0_set_rlc_funcs(adev);
4757 
4758 	return 0;
4759 }
4760 
4761 static int gfx_v9_0_ecc_late_init(void *handle)
4762 {
4763 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4764 	int r;
4765 
4766 	/*
4767 	 * Temp workaround to fix the issue that CP firmware fails to
4768 	 * update read pointer when CPDMA is writing clearing operation
4769 	 * to GDS in suspend/resume sequence on several cards. So just
4770 	 * limit this operation in cold boot sequence.
4771 	 */
4772 	if ((!adev->in_suspend) &&
4773 	    (adev->gds.gds_size)) {
4774 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4775 		if (r)
4776 			return r;
4777 	}
4778 
4779 	/* requires IBs so do in late init after IB pool is initialized */
4780 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4781 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4782 	else
4783 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4784 
4785 	if (r)
4786 		return r;
4787 
4788 	if (adev->gfx.ras_funcs &&
4789 	    adev->gfx.ras_funcs->ras_late_init) {
4790 		r = adev->gfx.ras_funcs->ras_late_init(adev);
4791 		if (r)
4792 			return r;
4793 	}
4794 
4795 	if (adev->gfx.ras_funcs &&
4796 	    adev->gfx.ras_funcs->enable_watchdog_timer)
4797 		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4798 
4799 	return 0;
4800 }
4801 
4802 static int gfx_v9_0_late_init(void *handle)
4803 {
4804 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805 	int r;
4806 
4807 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4808 	if (r)
4809 		return r;
4810 
4811 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4812 	if (r)
4813 		return r;
4814 
4815 	r = gfx_v9_0_ecc_late_init(handle);
4816 	if (r)
4817 		return r;
4818 
4819 	return 0;
4820 }
4821 
4822 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4823 {
4824 	uint32_t rlc_setting;
4825 
4826 	/* if RLC is not enabled, do nothing */
4827 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4828 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4829 		return false;
4830 
4831 	return true;
4832 }
4833 
4834 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4835 {
4836 	uint32_t data;
4837 	unsigned i;
4838 
4839 	data = RLC_SAFE_MODE__CMD_MASK;
4840 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4841 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4842 
4843 	/* wait for RLC_SAFE_MODE */
4844 	for (i = 0; i < adev->usec_timeout; i++) {
4845 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4846 			break;
4847 		udelay(1);
4848 	}
4849 }
4850 
4851 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4852 {
4853 	uint32_t data;
4854 
4855 	data = RLC_SAFE_MODE__CMD_MASK;
4856 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4857 }
4858 
4859 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4860 						bool enable)
4861 {
4862 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4863 
4864 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4865 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4866 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4867 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4868 	} else {
4869 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4870 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4871 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4872 	}
4873 
4874 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4875 }
4876 
4877 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4878 						bool enable)
4879 {
4880 	/* TODO: double check if we need to perform under safe mode */
4881 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4882 
4883 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4884 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4885 	else
4886 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4887 
4888 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4889 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4890 	else
4891 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4892 
4893 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4894 }
4895 
4896 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4897 						      bool enable)
4898 {
4899 	uint32_t data, def;
4900 
4901 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4902 
4903 	/* It is disabled by HW by default */
4904 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4905 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4906 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4907 
4908 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4909 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4910 
4911 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4912 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4913 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4914 
4915 		/* only for Vega10 & Raven1 */
4916 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4917 
4918 		if (def != data)
4919 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4920 
4921 		/* MGLS is a global flag to control all MGLS in GFX */
4922 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4923 			/* 2 - RLC memory Light sleep */
4924 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4925 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4926 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4927 				if (def != data)
4928 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4929 			}
4930 			/* 3 - CP memory Light sleep */
4931 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4932 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4933 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4934 				if (def != data)
4935 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4936 			}
4937 		}
4938 	} else {
4939 		/* 1 - MGCG_OVERRIDE */
4940 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4941 
4942 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4943 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4944 
4945 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4946 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4947 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4948 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4949 
4950 		if (def != data)
4951 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4952 
4953 		/* 2 - disable MGLS in RLC */
4954 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4955 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4956 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4957 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4958 		}
4959 
4960 		/* 3 - disable MGLS in CP */
4961 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4962 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4963 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4964 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4965 		}
4966 	}
4967 
4968 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4969 }
4970 
4971 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4972 					   bool enable)
4973 {
4974 	uint32_t data, def;
4975 
4976 	if (!adev->gfx.num_gfx_rings)
4977 		return;
4978 
4979 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4980 
4981 	/* Enable 3D CGCG/CGLS */
4982 	if (enable) {
4983 		/* write cmd to clear cgcg/cgls ov */
4984 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4985 		/* unset CGCG override */
4986 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4987 		/* update CGCG and CGLS override bits */
4988 		if (def != data)
4989 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4990 
4991 		/* enable 3Dcgcg FSM(0x0000363f) */
4992 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4993 
4994 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4995 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4996 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4997 		else
4998 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4999 
5000 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5001 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5002 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5003 		if (def != data)
5004 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5005 
5006 		/* set IDLE_POLL_COUNT(0x00900100) */
5007 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5008 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5009 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5010 		if (def != data)
5011 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5012 	} else {
5013 		/* Disable CGCG/CGLS */
5014 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5015 		/* disable cgcg, cgls should be disabled */
5016 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5017 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5018 		/* disable cgcg and cgls in FSM */
5019 		if (def != data)
5020 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5021 	}
5022 
5023 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5024 }
5025 
5026 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5027 						      bool enable)
5028 {
5029 	uint32_t def, data;
5030 
5031 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5032 
5033 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5034 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5035 		/* unset CGCG override */
5036 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5037 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5038 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5039 		else
5040 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5041 		/* update CGCG and CGLS override bits */
5042 		if (def != data)
5043 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5044 
5045 		/* enable cgcg FSM(0x0000363F) */
5046 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5047 
5048 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5049 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5050 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5051 		else
5052 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5053 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5054 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5055 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5056 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5057 		if (def != data)
5058 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5059 
5060 		/* set IDLE_POLL_COUNT(0x00900100) */
5061 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5062 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5063 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5064 		if (def != data)
5065 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5066 	} else {
5067 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5068 		/* reset CGCG/CGLS bits */
5069 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5070 		/* disable cgcg and cgls in FSM */
5071 		if (def != data)
5072 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5073 	}
5074 
5075 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5076 }
5077 
5078 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5079 					    bool enable)
5080 {
5081 	if (enable) {
5082 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5083 		 * ===  MGCG + MGLS ===
5084 		 */
5085 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5086 		/* ===  CGCG /CGLS for GFX 3D Only === */
5087 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5088 		/* ===  CGCG + CGLS === */
5089 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5090 	} else {
5091 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5092 		 * ===  CGCG + CGLS ===
5093 		 */
5094 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5095 		/* ===  CGCG /CGLS for GFX 3D Only === */
5096 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5097 		/* ===  MGCG + MGLS === */
5098 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5099 	}
5100 	return 0;
5101 }
5102 
5103 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5104 {
5105 	u32 reg, data;
5106 
5107 	amdgpu_gfx_off_ctrl(adev, false);
5108 
5109 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5110 	if (amdgpu_sriov_is_pp_one_vf(adev))
5111 		data = RREG32_NO_KIQ(reg);
5112 	else
5113 		data = RREG32(reg);
5114 
5115 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5116 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5117 
5118 	if (amdgpu_sriov_is_pp_one_vf(adev))
5119 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5120 	else
5121 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5122 
5123 	amdgpu_gfx_off_ctrl(adev, true);
5124 }
5125 
5126 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5127 					uint32_t offset,
5128 					struct soc15_reg_rlcg *entries, int arr_size)
5129 {
5130 	int i;
5131 	uint32_t reg;
5132 
5133 	if (!entries)
5134 		return false;
5135 
5136 	for (i = 0; i < arr_size; i++) {
5137 		const struct soc15_reg_rlcg *entry;
5138 
5139 		entry = &entries[i];
5140 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5141 		if (offset == reg)
5142 			return true;
5143 	}
5144 
5145 	return false;
5146 }
5147 
5148 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5149 {
5150 	return gfx_v9_0_check_rlcg_range(adev, offset,
5151 					(void *)rlcg_access_gc_9_0,
5152 					ARRAY_SIZE(rlcg_access_gc_9_0));
5153 }
5154 
5155 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5156 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5157 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5158 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5159 	.init = gfx_v9_0_rlc_init,
5160 	.get_csb_size = gfx_v9_0_get_csb_size,
5161 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5162 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5163 	.resume = gfx_v9_0_rlc_resume,
5164 	.stop = gfx_v9_0_rlc_stop,
5165 	.reset = gfx_v9_0_rlc_reset,
5166 	.start = gfx_v9_0_rlc_start,
5167 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5168 	.sriov_wreg = gfx_v9_0_sriov_wreg,
5169 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5170 };
5171 
5172 static int gfx_v9_0_set_powergating_state(void *handle,
5173 					  enum amd_powergating_state state)
5174 {
5175 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5176 	bool enable = (state == AMD_PG_STATE_GATE);
5177 
5178 	switch (adev->ip_versions[GC_HWIP][0]) {
5179 	case IP_VERSION(9, 2, 2):
5180 	case IP_VERSION(9, 1, 0):
5181 	case IP_VERSION(9, 3, 0):
5182 		if (!enable)
5183 			amdgpu_gfx_off_ctrl(adev, false);
5184 
5185 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5186 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5187 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5188 		} else {
5189 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5190 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5191 		}
5192 
5193 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5194 			gfx_v9_0_enable_cp_power_gating(adev, true);
5195 		else
5196 			gfx_v9_0_enable_cp_power_gating(adev, false);
5197 
5198 		/* update gfx cgpg state */
5199 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5200 
5201 		/* update mgcg state */
5202 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5203 
5204 		if (enable)
5205 			amdgpu_gfx_off_ctrl(adev, true);
5206 		break;
5207 	case IP_VERSION(9, 2, 1):
5208 		amdgpu_gfx_off_ctrl(adev, enable);
5209 		break;
5210 	default:
5211 		break;
5212 	}
5213 
5214 	return 0;
5215 }
5216 
5217 static int gfx_v9_0_set_clockgating_state(void *handle,
5218 					  enum amd_clockgating_state state)
5219 {
5220 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5221 
5222 	if (amdgpu_sriov_vf(adev))
5223 		return 0;
5224 
5225 	switch (adev->ip_versions[GC_HWIP][0]) {
5226 	case IP_VERSION(9, 0, 1):
5227 	case IP_VERSION(9, 2, 1):
5228 	case IP_VERSION(9, 4, 0):
5229 	case IP_VERSION(9, 2, 2):
5230 	case IP_VERSION(9, 1, 0):
5231 	case IP_VERSION(9, 4, 1):
5232 	case IP_VERSION(9, 3, 0):
5233 	case IP_VERSION(9, 4, 2):
5234 		gfx_v9_0_update_gfx_clock_gating(adev,
5235 						 state == AMD_CG_STATE_GATE);
5236 		break;
5237 	default:
5238 		break;
5239 	}
5240 	return 0;
5241 }
5242 
5243 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5244 {
5245 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5246 	int data;
5247 
5248 	if (amdgpu_sriov_vf(adev))
5249 		*flags = 0;
5250 
5251 	/* AMD_CG_SUPPORT_GFX_MGCG */
5252 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5253 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5254 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5255 
5256 	/* AMD_CG_SUPPORT_GFX_CGCG */
5257 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5258 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5259 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5260 
5261 	/* AMD_CG_SUPPORT_GFX_CGLS */
5262 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5263 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5264 
5265 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5266 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5267 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5268 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5269 
5270 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5271 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5272 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5273 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5274 
5275 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5276 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5277 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5278 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5279 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5280 
5281 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5282 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5283 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5284 	}
5285 }
5286 
5287 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5288 {
5289 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5290 }
5291 
5292 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5293 {
5294 	struct amdgpu_device *adev = ring->adev;
5295 	u64 wptr;
5296 
5297 	/* XXX check if swapping is necessary on BE */
5298 	if (ring->use_doorbell) {
5299 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5300 	} else {
5301 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5302 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5303 	}
5304 
5305 	return wptr;
5306 }
5307 
5308 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5309 {
5310 	struct amdgpu_device *adev = ring->adev;
5311 
5312 	if (ring->use_doorbell) {
5313 		/* XXX check if swapping is necessary on BE */
5314 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5315 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5316 	} else {
5317 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5318 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5319 	}
5320 }
5321 
5322 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5323 {
5324 	struct amdgpu_device *adev = ring->adev;
5325 	u32 ref_and_mask, reg_mem_engine;
5326 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5327 
5328 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5329 		switch (ring->me) {
5330 		case 1:
5331 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5332 			break;
5333 		case 2:
5334 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5335 			break;
5336 		default:
5337 			return;
5338 		}
5339 		reg_mem_engine = 0;
5340 	} else {
5341 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5342 		reg_mem_engine = 1; /* pfp */
5343 	}
5344 
5345 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5346 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5347 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5348 			      ref_and_mask, ref_and_mask, 0x20);
5349 }
5350 
5351 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5352 					struct amdgpu_job *job,
5353 					struct amdgpu_ib *ib,
5354 					uint32_t flags)
5355 {
5356 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5357 	u32 header, control = 0;
5358 
5359 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5360 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5361 	else
5362 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5363 
5364 	control |= ib->length_dw | (vmid << 24);
5365 
5366 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5367 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5368 
5369 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5370 			gfx_v9_0_ring_emit_de_meta(ring);
5371 	}
5372 
5373 	amdgpu_ring_write(ring, header);
5374 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5375 	amdgpu_ring_write(ring,
5376 #ifdef __BIG_ENDIAN
5377 		(2 << 0) |
5378 #endif
5379 		lower_32_bits(ib->gpu_addr));
5380 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5381 	amdgpu_ring_write(ring, control);
5382 }
5383 
5384 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5385 					  struct amdgpu_job *job,
5386 					  struct amdgpu_ib *ib,
5387 					  uint32_t flags)
5388 {
5389 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5390 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5391 
5392 	/* Currently, there is a high possibility to get wave ID mismatch
5393 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5394 	 * different wave IDs than the GDS expects. This situation happens
5395 	 * randomly when at least 5 compute pipes use GDS ordered append.
5396 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5397 	 * Those are probably bugs somewhere else in the kernel driver.
5398 	 *
5399 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5400 	 * GDS to 0 for this ring (me/pipe).
5401 	 */
5402 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5403 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5404 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5405 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5406 	}
5407 
5408 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5409 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5410 	amdgpu_ring_write(ring,
5411 #ifdef __BIG_ENDIAN
5412 				(2 << 0) |
5413 #endif
5414 				lower_32_bits(ib->gpu_addr));
5415 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5416 	amdgpu_ring_write(ring, control);
5417 }
5418 
5419 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5420 				     u64 seq, unsigned flags)
5421 {
5422 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5423 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5424 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5425 
5426 	/* RELEASE_MEM - flush caches, send int */
5427 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5428 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5429 					       EOP_TC_NC_ACTION_EN) :
5430 					      (EOP_TCL1_ACTION_EN |
5431 					       EOP_TC_ACTION_EN |
5432 					       EOP_TC_WB_ACTION_EN |
5433 					       EOP_TC_MD_ACTION_EN)) |
5434 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5435 				 EVENT_INDEX(5)));
5436 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5437 
5438 	/*
5439 	 * the address should be Qword aligned if 64bit write, Dword
5440 	 * aligned if only send 32bit data low (discard data high)
5441 	 */
5442 	if (write64bit)
5443 		BUG_ON(addr & 0x7);
5444 	else
5445 		BUG_ON(addr & 0x3);
5446 	amdgpu_ring_write(ring, lower_32_bits(addr));
5447 	amdgpu_ring_write(ring, upper_32_bits(addr));
5448 	amdgpu_ring_write(ring, lower_32_bits(seq));
5449 	amdgpu_ring_write(ring, upper_32_bits(seq));
5450 	amdgpu_ring_write(ring, 0);
5451 }
5452 
5453 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5454 {
5455 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5456 	uint32_t seq = ring->fence_drv.sync_seq;
5457 	uint64_t addr = ring->fence_drv.gpu_addr;
5458 
5459 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5460 			      lower_32_bits(addr), upper_32_bits(addr),
5461 			      seq, 0xffffffff, 4);
5462 }
5463 
5464 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5465 					unsigned vmid, uint64_t pd_addr)
5466 {
5467 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5468 
5469 	/* compute doesn't have PFP */
5470 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5471 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5472 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5473 		amdgpu_ring_write(ring, 0x0);
5474 	}
5475 }
5476 
5477 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5478 {
5479 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5480 }
5481 
5482 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5483 {
5484 	u64 wptr;
5485 
5486 	/* XXX check if swapping is necessary on BE */
5487 	if (ring->use_doorbell)
5488 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5489 	else
5490 		BUG();
5491 	return wptr;
5492 }
5493 
5494 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5495 {
5496 	struct amdgpu_device *adev = ring->adev;
5497 
5498 	/* XXX check if swapping is necessary on BE */
5499 	if (ring->use_doorbell) {
5500 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5501 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5502 	} else{
5503 		BUG(); /* only DOORBELL method supported on gfx9 now */
5504 	}
5505 }
5506 
5507 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5508 					 u64 seq, unsigned int flags)
5509 {
5510 	struct amdgpu_device *adev = ring->adev;
5511 
5512 	/* we only allocate 32bit for each seq wb address */
5513 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5514 
5515 	/* write fence seq to the "addr" */
5516 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5517 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5518 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5519 	amdgpu_ring_write(ring, lower_32_bits(addr));
5520 	amdgpu_ring_write(ring, upper_32_bits(addr));
5521 	amdgpu_ring_write(ring, lower_32_bits(seq));
5522 
5523 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5524 		/* set register to trigger INT */
5525 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5526 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5527 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5528 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5529 		amdgpu_ring_write(ring, 0);
5530 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5531 	}
5532 }
5533 
5534 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5535 {
5536 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5537 	amdgpu_ring_write(ring, 0);
5538 }
5539 
5540 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5541 {
5542 	struct v9_ce_ib_state ce_payload = {0};
5543 	uint64_t csa_addr;
5544 	int cnt;
5545 
5546 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5547 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5548 
5549 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5550 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5551 				 WRITE_DATA_DST_SEL(8) |
5552 				 WR_CONFIRM) |
5553 				 WRITE_DATA_CACHE_POLICY(0));
5554 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5555 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5556 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5557 }
5558 
5559 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5560 {
5561 	struct v9_de_ib_state de_payload = {0};
5562 	uint64_t csa_addr, gds_addr;
5563 	int cnt;
5564 
5565 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5566 	gds_addr = csa_addr + 4096;
5567 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5568 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5569 
5570 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5571 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5572 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5573 				 WRITE_DATA_DST_SEL(8) |
5574 				 WR_CONFIRM) |
5575 				 WRITE_DATA_CACHE_POLICY(0));
5576 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5577 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5578 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5579 }
5580 
5581 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5582 				   bool secure)
5583 {
5584 	uint32_t v = secure ? FRAME_TMZ : 0;
5585 
5586 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5587 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5588 }
5589 
5590 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5591 {
5592 	uint32_t dw2 = 0;
5593 
5594 	if (amdgpu_sriov_vf(ring->adev))
5595 		gfx_v9_0_ring_emit_ce_meta(ring);
5596 
5597 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5598 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5599 		/* set load_global_config & load_global_uconfig */
5600 		dw2 |= 0x8001;
5601 		/* set load_cs_sh_regs */
5602 		dw2 |= 0x01000000;
5603 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5604 		dw2 |= 0x10002;
5605 
5606 		/* set load_ce_ram if preamble presented */
5607 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5608 			dw2 |= 0x10000000;
5609 	} else {
5610 		/* still load_ce_ram if this is the first time preamble presented
5611 		 * although there is no context switch happens.
5612 		 */
5613 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5614 			dw2 |= 0x10000000;
5615 	}
5616 
5617 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5618 	amdgpu_ring_write(ring, dw2);
5619 	amdgpu_ring_write(ring, 0);
5620 }
5621 
5622 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5623 {
5624 	unsigned ret;
5625 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5626 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5627 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5628 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5629 	ret = ring->wptr & ring->buf_mask;
5630 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5631 	return ret;
5632 }
5633 
5634 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5635 {
5636 	unsigned cur;
5637 	BUG_ON(offset > ring->buf_mask);
5638 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5639 
5640 	cur = (ring->wptr & ring->buf_mask) - 1;
5641 	if (likely(cur > offset))
5642 		ring->ring[offset] = cur - offset;
5643 	else
5644 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5645 }
5646 
5647 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5648 				    uint32_t reg_val_offs)
5649 {
5650 	struct amdgpu_device *adev = ring->adev;
5651 
5652 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5653 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5654 				(5 << 8) |	/* dst: memory */
5655 				(1 << 20));	/* write confirm */
5656 	amdgpu_ring_write(ring, reg);
5657 	amdgpu_ring_write(ring, 0);
5658 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5659 				reg_val_offs * 4));
5660 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5661 				reg_val_offs * 4));
5662 }
5663 
5664 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5665 				    uint32_t val)
5666 {
5667 	uint32_t cmd = 0;
5668 
5669 	switch (ring->funcs->type) {
5670 	case AMDGPU_RING_TYPE_GFX:
5671 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5672 		break;
5673 	case AMDGPU_RING_TYPE_KIQ:
5674 		cmd = (1 << 16); /* no inc addr */
5675 		break;
5676 	default:
5677 		cmd = WR_CONFIRM;
5678 		break;
5679 	}
5680 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5681 	amdgpu_ring_write(ring, cmd);
5682 	amdgpu_ring_write(ring, reg);
5683 	amdgpu_ring_write(ring, 0);
5684 	amdgpu_ring_write(ring, val);
5685 }
5686 
5687 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5688 					uint32_t val, uint32_t mask)
5689 {
5690 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5691 }
5692 
5693 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5694 						  uint32_t reg0, uint32_t reg1,
5695 						  uint32_t ref, uint32_t mask)
5696 {
5697 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5698 	struct amdgpu_device *adev = ring->adev;
5699 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5700 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5701 
5702 	if (fw_version_ok)
5703 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5704 				      ref, mask, 0x20);
5705 	else
5706 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5707 							   ref, mask);
5708 }
5709 
5710 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5711 {
5712 	struct amdgpu_device *adev = ring->adev;
5713 	uint32_t value = 0;
5714 
5715 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5716 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5717 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5718 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5719 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5720 }
5721 
5722 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5723 						 enum amdgpu_interrupt_state state)
5724 {
5725 	switch (state) {
5726 	case AMDGPU_IRQ_STATE_DISABLE:
5727 	case AMDGPU_IRQ_STATE_ENABLE:
5728 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5729 			       TIME_STAMP_INT_ENABLE,
5730 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5731 		break;
5732 	default:
5733 		break;
5734 	}
5735 }
5736 
5737 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5738 						     int me, int pipe,
5739 						     enum amdgpu_interrupt_state state)
5740 {
5741 	u32 mec_int_cntl, mec_int_cntl_reg;
5742 
5743 	/*
5744 	 * amdgpu controls only the first MEC. That's why this function only
5745 	 * handles the setting of interrupts for this specific MEC. All other
5746 	 * pipes' interrupts are set by amdkfd.
5747 	 */
5748 
5749 	if (me == 1) {
5750 		switch (pipe) {
5751 		case 0:
5752 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5753 			break;
5754 		case 1:
5755 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5756 			break;
5757 		case 2:
5758 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5759 			break;
5760 		case 3:
5761 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5762 			break;
5763 		default:
5764 			DRM_DEBUG("invalid pipe %d\n", pipe);
5765 			return;
5766 		}
5767 	} else {
5768 		DRM_DEBUG("invalid me %d\n", me);
5769 		return;
5770 	}
5771 
5772 	switch (state) {
5773 	case AMDGPU_IRQ_STATE_DISABLE:
5774 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5775 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5776 					     TIME_STAMP_INT_ENABLE, 0);
5777 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5778 		break;
5779 	case AMDGPU_IRQ_STATE_ENABLE:
5780 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5781 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5782 					     TIME_STAMP_INT_ENABLE, 1);
5783 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5784 		break;
5785 	default:
5786 		break;
5787 	}
5788 }
5789 
5790 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5791 					     struct amdgpu_irq_src *source,
5792 					     unsigned type,
5793 					     enum amdgpu_interrupt_state state)
5794 {
5795 	switch (state) {
5796 	case AMDGPU_IRQ_STATE_DISABLE:
5797 	case AMDGPU_IRQ_STATE_ENABLE:
5798 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5799 			       PRIV_REG_INT_ENABLE,
5800 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5801 		break;
5802 	default:
5803 		break;
5804 	}
5805 
5806 	return 0;
5807 }
5808 
5809 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5810 					      struct amdgpu_irq_src *source,
5811 					      unsigned type,
5812 					      enum amdgpu_interrupt_state state)
5813 {
5814 	switch (state) {
5815 	case AMDGPU_IRQ_STATE_DISABLE:
5816 	case AMDGPU_IRQ_STATE_ENABLE:
5817 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5818 			       PRIV_INSTR_INT_ENABLE,
5819 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5820 		break;
5821 	default:
5822 		break;
5823 	}
5824 
5825 	return 0;
5826 }
5827 
5828 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5829 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5830 			CP_ECC_ERROR_INT_ENABLE, 1)
5831 
5832 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5833 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5834 			CP_ECC_ERROR_INT_ENABLE, 0)
5835 
5836 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5837 					      struct amdgpu_irq_src *source,
5838 					      unsigned type,
5839 					      enum amdgpu_interrupt_state state)
5840 {
5841 	switch (state) {
5842 	case AMDGPU_IRQ_STATE_DISABLE:
5843 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5844 				CP_ECC_ERROR_INT_ENABLE, 0);
5845 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5846 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5847 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5848 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5849 		break;
5850 
5851 	case AMDGPU_IRQ_STATE_ENABLE:
5852 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5853 				CP_ECC_ERROR_INT_ENABLE, 1);
5854 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5855 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5856 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5857 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5858 		break;
5859 	default:
5860 		break;
5861 	}
5862 
5863 	return 0;
5864 }
5865 
5866 
5867 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5868 					    struct amdgpu_irq_src *src,
5869 					    unsigned type,
5870 					    enum amdgpu_interrupt_state state)
5871 {
5872 	switch (type) {
5873 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5874 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5875 		break;
5876 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5877 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5878 		break;
5879 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5880 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5881 		break;
5882 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5883 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5884 		break;
5885 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5886 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5887 		break;
5888 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5889 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5890 		break;
5891 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5892 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5893 		break;
5894 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5895 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5896 		break;
5897 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5898 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5899 		break;
5900 	default:
5901 		break;
5902 	}
5903 	return 0;
5904 }
5905 
5906 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5907 			    struct amdgpu_irq_src *source,
5908 			    struct amdgpu_iv_entry *entry)
5909 {
5910 	int i;
5911 	u8 me_id, pipe_id, queue_id;
5912 	struct amdgpu_ring *ring;
5913 
5914 	DRM_DEBUG("IH: CP EOP\n");
5915 	me_id = (entry->ring_id & 0x0c) >> 2;
5916 	pipe_id = (entry->ring_id & 0x03) >> 0;
5917 	queue_id = (entry->ring_id & 0x70) >> 4;
5918 
5919 	switch (me_id) {
5920 	case 0:
5921 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5922 		break;
5923 	case 1:
5924 	case 2:
5925 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5926 			ring = &adev->gfx.compute_ring[i];
5927 			/* Per-queue interrupt is supported for MEC starting from VI.
5928 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5929 			  */
5930 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5931 				amdgpu_fence_process(ring);
5932 		}
5933 		break;
5934 	}
5935 	return 0;
5936 }
5937 
5938 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5939 			   struct amdgpu_iv_entry *entry)
5940 {
5941 	u8 me_id, pipe_id, queue_id;
5942 	struct amdgpu_ring *ring;
5943 	int i;
5944 
5945 	me_id = (entry->ring_id & 0x0c) >> 2;
5946 	pipe_id = (entry->ring_id & 0x03) >> 0;
5947 	queue_id = (entry->ring_id & 0x70) >> 4;
5948 
5949 	switch (me_id) {
5950 	case 0:
5951 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5952 		break;
5953 	case 1:
5954 	case 2:
5955 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5956 			ring = &adev->gfx.compute_ring[i];
5957 			if (ring->me == me_id && ring->pipe == pipe_id &&
5958 			    ring->queue == queue_id)
5959 				drm_sched_fault(&ring->sched);
5960 		}
5961 		break;
5962 	}
5963 }
5964 
5965 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5966 				 struct amdgpu_irq_src *source,
5967 				 struct amdgpu_iv_entry *entry)
5968 {
5969 	DRM_ERROR("Illegal register access in command stream\n");
5970 	gfx_v9_0_fault(adev, entry);
5971 	return 0;
5972 }
5973 
5974 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5975 				  struct amdgpu_irq_src *source,
5976 				  struct amdgpu_iv_entry *entry)
5977 {
5978 	DRM_ERROR("Illegal instruction in command stream\n");
5979 	gfx_v9_0_fault(adev, entry);
5980 	return 0;
5981 }
5982 
5983 
5984 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5985 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5986 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5987 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5988 	},
5989 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5990 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5991 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5992 	},
5993 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5994 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5995 	  0, 0
5996 	},
5997 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5998 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5999 	  0, 0
6000 	},
6001 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6002 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6003 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6004 	},
6005 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6006 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6007 	  0, 0
6008 	},
6009 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6010 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6011 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6012 	},
6013 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6014 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6015 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6016 	},
6017 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6018 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6019 	  0, 0
6020 	},
6021 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6022 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6023 	  0, 0
6024 	},
6025 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6026 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6027 	  0, 0
6028 	},
6029 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6030 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6031 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6032 	},
6033 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6034 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6035 	  0, 0
6036 	},
6037 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6038 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6039 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6040 	},
6041 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6042 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6043 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6044 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6045 	},
6046 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6047 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6048 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6049 	  0, 0
6050 	},
6051 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6052 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6053 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6054 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6055 	},
6056 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6057 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6058 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6059 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6060 	},
6061 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6062 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6063 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6064 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6065 	},
6066 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6067 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6068 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6069 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6070 	},
6071 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6072 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6073 	  0, 0
6074 	},
6075 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6076 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6077 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6078 	},
6079 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6080 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6081 	  0, 0
6082 	},
6083 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6084 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6085 	  0, 0
6086 	},
6087 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6088 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6089 	  0, 0
6090 	},
6091 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6092 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6096 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6097 	  0, 0
6098 	},
6099 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6100 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6101 	  0, 0
6102 	},
6103 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6104 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6105 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6106 	},
6107 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6108 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6109 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6110 	},
6111 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6112 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6113 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6114 	},
6115 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6116 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6117 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6118 	},
6119 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6120 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6121 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6122 	},
6123 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6124 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6125 	  0, 0
6126 	},
6127 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6128 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6129 	  0, 0
6130 	},
6131 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6132 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6133 	  0, 0
6134 	},
6135 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6136 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6137 	  0, 0
6138 	},
6139 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6140 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6141 	  0, 0
6142 	},
6143 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6144 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6145 	  0, 0
6146 	},
6147 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6148 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6149 	  0, 0
6150 	},
6151 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6152 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6153 	  0, 0
6154 	},
6155 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6156 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6157 	  0, 0
6158 	},
6159 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6160 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6161 	  0, 0
6162 	},
6163 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6164 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6165 	  0, 0
6166 	},
6167 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6168 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6169 	  0, 0
6170 	},
6171 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6172 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6173 	  0, 0
6174 	},
6175 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6176 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6177 	  0, 0
6178 	},
6179 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6180 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6181 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6182 	},
6183 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6184 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6185 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6186 	},
6187 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6188 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6189 	  0, 0
6190 	},
6191 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6192 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6193 	  0, 0
6194 	},
6195 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6196 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6197 	  0, 0
6198 	},
6199 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6200 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6201 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6202 	},
6203 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6204 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6205 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6206 	},
6207 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6208 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6209 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6210 	},
6211 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6212 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6213 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6214 	},
6215 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6216 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6217 	  0, 0
6218 	},
6219 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6220 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6221 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6222 	},
6223 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6224 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6225 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6226 	},
6227 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6228 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6229 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6230 	},
6231 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6232 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6233 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6234 	},
6235 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6236 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6237 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6238 	},
6239 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6240 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6241 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6242 	},
6243 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6244 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6245 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6246 	},
6247 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6248 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6249 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6250 	},
6251 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6252 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6253 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6254 	},
6255 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6256 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6257 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6258 	},
6259 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6260 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6261 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6262 	},
6263 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6264 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6265 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6266 	},
6267 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6268 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6270 	},
6271 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6272 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6274 	},
6275 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6276 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6278 	},
6279 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6280 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6282 	},
6283 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6284 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6286 	},
6287 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6288 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6289 	  0, 0
6290 	},
6291 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6292 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6293 	  0, 0
6294 	},
6295 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6296 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6297 	  0, 0
6298 	},
6299 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6300 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6301 	  0, 0
6302 	},
6303 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6304 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6305 	  0, 0
6306 	},
6307 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6308 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6309 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6310 	},
6311 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6312 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6313 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6314 	},
6315 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6316 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6317 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6318 	},
6319 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6320 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6321 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6322 	},
6323 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6324 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6325 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6326 	},
6327 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6328 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6329 	  0, 0
6330 	},
6331 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6332 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6333 	  0, 0
6334 	},
6335 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6336 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6337 	  0, 0
6338 	},
6339 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6340 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6341 	  0, 0
6342 	},
6343 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6344 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6345 	  0, 0
6346 	},
6347 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6348 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6350 	},
6351 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6352 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6354 	},
6355 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6356 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6358 	},
6359 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6360 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6361 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6362 	},
6363 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6364 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6365 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6366 	},
6367 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6368 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6369 	  0, 0
6370 	},
6371 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6372 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6373 	  0, 0
6374 	},
6375 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6376 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6377 	  0, 0
6378 	},
6379 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6380 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6381 	  0, 0
6382 	},
6383 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6384 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6385 	  0, 0
6386 	},
6387 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6388 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6389 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6390 	},
6391 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6392 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6393 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6394 	},
6395 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6396 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6397 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6398 	},
6399 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6400 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6401 	  0, 0
6402 	},
6403 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6404 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6405 	  0, 0
6406 	},
6407 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6408 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6409 	  0, 0
6410 	},
6411 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6412 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6413 	  0, 0
6414 	},
6415 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6416 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6417 	  0, 0
6418 	},
6419 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6420 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6421 	  0, 0
6422 	}
6423 };
6424 
6425 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6426 				     void *inject_if)
6427 {
6428 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6429 	int ret;
6430 	struct ta_ras_trigger_error_input block_info = { 0 };
6431 
6432 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6433 		return -EINVAL;
6434 
6435 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6436 		return -EINVAL;
6437 
6438 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6439 		return -EPERM;
6440 
6441 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6442 	      info->head.type)) {
6443 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6444 			ras_gfx_subblocks[info->head.sub_block_index].name,
6445 			info->head.type);
6446 		return -EPERM;
6447 	}
6448 
6449 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6450 	      info->head.type)) {
6451 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6452 			ras_gfx_subblocks[info->head.sub_block_index].name,
6453 			info->head.type);
6454 		return -EPERM;
6455 	}
6456 
6457 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6458 	block_info.sub_block_index =
6459 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6460 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6461 	block_info.address = info->address;
6462 	block_info.value = info->value;
6463 
6464 	mutex_lock(&adev->grbm_idx_mutex);
6465 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6466 	mutex_unlock(&adev->grbm_idx_mutex);
6467 
6468 	return ret;
6469 }
6470 
6471 static const char *vml2_mems[] = {
6472 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6473 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6474 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6475 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6476 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6477 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6478 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6479 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6480 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6481 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6482 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6483 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6484 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6485 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6486 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6487 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6488 };
6489 
6490 static const char *vml2_walker_mems[] = {
6491 	"UTC_VML2_CACHE_PDE0_MEM0",
6492 	"UTC_VML2_CACHE_PDE0_MEM1",
6493 	"UTC_VML2_CACHE_PDE1_MEM0",
6494 	"UTC_VML2_CACHE_PDE1_MEM1",
6495 	"UTC_VML2_CACHE_PDE2_MEM0",
6496 	"UTC_VML2_CACHE_PDE2_MEM1",
6497 	"UTC_VML2_RDIF_LOG_FIFO",
6498 };
6499 
6500 static const char *atc_l2_cache_2m_mems[] = {
6501 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6502 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6503 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6504 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6505 };
6506 
6507 static const char *atc_l2_cache_4k_mems[] = {
6508 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6509 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6510 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6511 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6512 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6513 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6514 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6515 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6516 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6517 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6518 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6519 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6520 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6521 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6522 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6523 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6524 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6525 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6526 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6527 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6528 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6529 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6530 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6531 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6532 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6533 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6534 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6535 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6536 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6537 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6538 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6539 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6540 };
6541 
6542 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6543 					 struct ras_err_data *err_data)
6544 {
6545 	uint32_t i, data;
6546 	uint32_t sec_count, ded_count;
6547 
6548 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6549 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6550 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6551 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6552 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6553 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6554 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6555 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6556 
6557 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6558 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6559 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6560 
6561 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6562 		if (sec_count) {
6563 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6564 				"SEC %d\n", i, vml2_mems[i], sec_count);
6565 			err_data->ce_count += sec_count;
6566 		}
6567 
6568 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6569 		if (ded_count) {
6570 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6571 				"DED %d\n", i, vml2_mems[i], ded_count);
6572 			err_data->ue_count += ded_count;
6573 		}
6574 	}
6575 
6576 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6577 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6578 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6579 
6580 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6581 						SEC_COUNT);
6582 		if (sec_count) {
6583 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6584 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6585 			err_data->ce_count += sec_count;
6586 		}
6587 
6588 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6589 						DED_COUNT);
6590 		if (ded_count) {
6591 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6592 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6593 			err_data->ue_count += ded_count;
6594 		}
6595 	}
6596 
6597 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6598 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6599 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6600 
6601 		sec_count = (data & 0x00006000L) >> 0xd;
6602 		if (sec_count) {
6603 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6604 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6605 				sec_count);
6606 			err_data->ce_count += sec_count;
6607 		}
6608 	}
6609 
6610 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6611 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6612 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6613 
6614 		sec_count = (data & 0x00006000L) >> 0xd;
6615 		if (sec_count) {
6616 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6618 				sec_count);
6619 			err_data->ce_count += sec_count;
6620 		}
6621 
6622 		ded_count = (data & 0x00018000L) >> 0xf;
6623 		if (ded_count) {
6624 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6625 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6626 				ded_count);
6627 			err_data->ue_count += ded_count;
6628 		}
6629 	}
6630 
6631 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6632 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6633 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6634 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6635 
6636 	return 0;
6637 }
6638 
6639 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6640 	const struct soc15_reg_entry *reg,
6641 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6642 	uint32_t *sec_count, uint32_t *ded_count)
6643 {
6644 	uint32_t i;
6645 	uint32_t sec_cnt, ded_cnt;
6646 
6647 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6648 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6649 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6650 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6651 			continue;
6652 
6653 		sec_cnt = (value &
6654 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6655 				gfx_v9_0_ras_fields[i].sec_count_shift;
6656 		if (sec_cnt) {
6657 			dev_info(adev->dev, "GFX SubBlock %s, "
6658 				"Instance[%d][%d], SEC %d\n",
6659 				gfx_v9_0_ras_fields[i].name,
6660 				se_id, inst_id,
6661 				sec_cnt);
6662 			*sec_count += sec_cnt;
6663 		}
6664 
6665 		ded_cnt = (value &
6666 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6667 				gfx_v9_0_ras_fields[i].ded_count_shift;
6668 		if (ded_cnt) {
6669 			dev_info(adev->dev, "GFX SubBlock %s, "
6670 				"Instance[%d][%d], DED %d\n",
6671 				gfx_v9_0_ras_fields[i].name,
6672 				se_id, inst_id,
6673 				ded_cnt);
6674 			*ded_count += ded_cnt;
6675 		}
6676 	}
6677 
6678 	return 0;
6679 }
6680 
6681 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6682 {
6683 	int i, j, k;
6684 
6685 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6686 		return;
6687 
6688 	/* read back registers to clear the counters */
6689 	mutex_lock(&adev->grbm_idx_mutex);
6690 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6691 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6692 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6693 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6694 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6695 			}
6696 		}
6697 	}
6698 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6699 	mutex_unlock(&adev->grbm_idx_mutex);
6700 
6701 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6702 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6703 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6704 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6705 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6706 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6707 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6708 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6709 
6710 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6711 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6712 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6713 	}
6714 
6715 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6716 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6717 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6718 	}
6719 
6720 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6721 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6722 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6723 	}
6724 
6725 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6726 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6727 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6728 	}
6729 
6730 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6731 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6732 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6733 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6734 }
6735 
6736 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6737 					  void *ras_error_status)
6738 {
6739 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6740 	uint32_t sec_count = 0, ded_count = 0;
6741 	uint32_t i, j, k;
6742 	uint32_t reg_value;
6743 
6744 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6745 		return -EINVAL;
6746 
6747 	err_data->ue_count = 0;
6748 	err_data->ce_count = 0;
6749 
6750 	mutex_lock(&adev->grbm_idx_mutex);
6751 
6752 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6753 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6754 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6755 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6756 				reg_value =
6757 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6758 				if (reg_value)
6759 					gfx_v9_0_ras_error_count(adev,
6760 						&gfx_v9_0_edc_counter_regs[i],
6761 						j, k, reg_value,
6762 						&sec_count, &ded_count);
6763 			}
6764 		}
6765 	}
6766 
6767 	err_data->ce_count += sec_count;
6768 	err_data->ue_count += ded_count;
6769 
6770 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6771 	mutex_unlock(&adev->grbm_idx_mutex);
6772 
6773 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6774 
6775 	return 0;
6776 }
6777 
6778 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6779 {
6780 	const unsigned int cp_coher_cntl =
6781 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6782 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6783 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6784 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6785 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6786 
6787 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6788 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6789 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6790 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6791 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6792 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6793 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6794 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6795 }
6796 
6797 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6798 					uint32_t pipe, bool enable)
6799 {
6800 	struct amdgpu_device *adev = ring->adev;
6801 	uint32_t val;
6802 	uint32_t wcl_cs_reg;
6803 
6804 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6805 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6806 
6807 	switch (pipe) {
6808 	case 0:
6809 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6810 		break;
6811 	case 1:
6812 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6813 		break;
6814 	case 2:
6815 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6816 		break;
6817 	case 3:
6818 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6819 		break;
6820 	default:
6821 		DRM_DEBUG("invalid pipe %d\n", pipe);
6822 		return;
6823 	}
6824 
6825 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6826 
6827 }
6828 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6829 {
6830 	struct amdgpu_device *adev = ring->adev;
6831 	uint32_t val;
6832 	int i;
6833 
6834 
6835 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6836 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6837 	 * around 25% of gpu resources.
6838 	 */
6839 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6840 	amdgpu_ring_emit_wreg(ring,
6841 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6842 			      val);
6843 
6844 	/* Restrict waves for normal/low priority compute queues as well
6845 	 * to get best QoS for high priority compute jobs.
6846 	 *
6847 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6848 	 */
6849 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6850 		if (i != ring->pipe)
6851 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6852 
6853 	}
6854 }
6855 
6856 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6857 	.name = "gfx_v9_0",
6858 	.early_init = gfx_v9_0_early_init,
6859 	.late_init = gfx_v9_0_late_init,
6860 	.sw_init = gfx_v9_0_sw_init,
6861 	.sw_fini = gfx_v9_0_sw_fini,
6862 	.hw_init = gfx_v9_0_hw_init,
6863 	.hw_fini = gfx_v9_0_hw_fini,
6864 	.suspend = gfx_v9_0_suspend,
6865 	.resume = gfx_v9_0_resume,
6866 	.is_idle = gfx_v9_0_is_idle,
6867 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6868 	.soft_reset = gfx_v9_0_soft_reset,
6869 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6870 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6871 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6872 };
6873 
6874 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6875 	.type = AMDGPU_RING_TYPE_GFX,
6876 	.align_mask = 0xff,
6877 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6878 	.support_64bit_ptrs = true,
6879 	.vmhub = AMDGPU_GFXHUB_0,
6880 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6881 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6882 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6883 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6884 		5 +  /* COND_EXEC */
6885 		7 +  /* PIPELINE_SYNC */
6886 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6887 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6888 		2 + /* VM_FLUSH */
6889 		8 +  /* FENCE for VM_FLUSH */
6890 		20 + /* GDS switch */
6891 		4 + /* double SWITCH_BUFFER,
6892 		       the first COND_EXEC jump to the place just
6893 			   prior to this double SWITCH_BUFFER  */
6894 		5 + /* COND_EXEC */
6895 		7 +	 /*	HDP_flush */
6896 		4 +	 /*	VGT_flush */
6897 		14 + /*	CE_META */
6898 		31 + /*	DE_META */
6899 		3 + /* CNTX_CTRL */
6900 		5 + /* HDP_INVL */
6901 		8 + 8 + /* FENCE x2 */
6902 		2 + /* SWITCH_BUFFER */
6903 		7, /* gfx_v9_0_emit_mem_sync */
6904 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6905 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6906 	.emit_fence = gfx_v9_0_ring_emit_fence,
6907 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6908 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6909 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6910 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6911 	.test_ring = gfx_v9_0_ring_test_ring,
6912 	.test_ib = gfx_v9_0_ring_test_ib,
6913 	.insert_nop = amdgpu_ring_insert_nop,
6914 	.pad_ib = amdgpu_ring_generic_pad_ib,
6915 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6916 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6917 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6918 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6919 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6920 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6921 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6922 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6923 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6924 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6925 };
6926 
6927 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6928 	.type = AMDGPU_RING_TYPE_COMPUTE,
6929 	.align_mask = 0xff,
6930 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6931 	.support_64bit_ptrs = true,
6932 	.vmhub = AMDGPU_GFXHUB_0,
6933 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6934 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6935 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6936 	.emit_frame_size =
6937 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6938 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6939 		5 + /* hdp invalidate */
6940 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6941 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6942 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6943 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6944 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6945 		7 + /* gfx_v9_0_emit_mem_sync */
6946 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6947 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6948 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6949 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6950 	.emit_fence = gfx_v9_0_ring_emit_fence,
6951 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6952 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6953 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6954 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6955 	.test_ring = gfx_v9_0_ring_test_ring,
6956 	.test_ib = gfx_v9_0_ring_test_ib,
6957 	.insert_nop = amdgpu_ring_insert_nop,
6958 	.pad_ib = amdgpu_ring_generic_pad_ib,
6959 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6960 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6961 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6962 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6963 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6964 };
6965 
6966 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6967 	.type = AMDGPU_RING_TYPE_KIQ,
6968 	.align_mask = 0xff,
6969 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6970 	.support_64bit_ptrs = true,
6971 	.vmhub = AMDGPU_GFXHUB_0,
6972 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6973 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6974 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6975 	.emit_frame_size =
6976 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6977 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6978 		5 + /* hdp invalidate */
6979 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6980 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6981 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6982 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6983 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6984 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6985 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6986 	.test_ring = gfx_v9_0_ring_test_ring,
6987 	.insert_nop = amdgpu_ring_insert_nop,
6988 	.pad_ib = amdgpu_ring_generic_pad_ib,
6989 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6990 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6991 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6992 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6993 };
6994 
6995 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6996 {
6997 	int i;
6998 
6999 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7000 
7001 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7002 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7003 
7004 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7005 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7006 }
7007 
7008 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7009 	.set = gfx_v9_0_set_eop_interrupt_state,
7010 	.process = gfx_v9_0_eop_irq,
7011 };
7012 
7013 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7014 	.set = gfx_v9_0_set_priv_reg_fault_state,
7015 	.process = gfx_v9_0_priv_reg_irq,
7016 };
7017 
7018 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7019 	.set = gfx_v9_0_set_priv_inst_fault_state,
7020 	.process = gfx_v9_0_priv_inst_irq,
7021 };
7022 
7023 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7024 	.set = gfx_v9_0_set_cp_ecc_error_state,
7025 	.process = amdgpu_gfx_cp_ecc_error_irq,
7026 };
7027 
7028 
7029 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7030 {
7031 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7032 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7033 
7034 	adev->gfx.priv_reg_irq.num_types = 1;
7035 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7036 
7037 	adev->gfx.priv_inst_irq.num_types = 1;
7038 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7039 
7040 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7041 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7042 }
7043 
7044 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7045 {
7046 	switch (adev->ip_versions[GC_HWIP][0]) {
7047 	case IP_VERSION(9, 0, 1):
7048 	case IP_VERSION(9, 2, 1):
7049 	case IP_VERSION(9, 4, 0):
7050 	case IP_VERSION(9, 2, 2):
7051 	case IP_VERSION(9, 1, 0):
7052 	case IP_VERSION(9, 4, 1):
7053 	case IP_VERSION(9, 3, 0):
7054 	case IP_VERSION(9, 4, 2):
7055 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7056 		break;
7057 	default:
7058 		break;
7059 	}
7060 }
7061 
7062 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7063 {
7064 	/* init asci gds info */
7065 	switch (adev->ip_versions[GC_HWIP][0]) {
7066 	case IP_VERSION(9, 0, 1):
7067 	case IP_VERSION(9, 2, 1):
7068 	case IP_VERSION(9, 4, 0):
7069 		adev->gds.gds_size = 0x10000;
7070 		break;
7071 	case IP_VERSION(9, 2, 2):
7072 	case IP_VERSION(9, 1, 0):
7073 	case IP_VERSION(9, 4, 1):
7074 		adev->gds.gds_size = 0x1000;
7075 		break;
7076 	case IP_VERSION(9, 4, 2):
7077 		/* aldebaran removed all the GDS internal memory,
7078 		 * only support GWS opcode in kernel, like barrier
7079 		 * semaphore.etc */
7080 		adev->gds.gds_size = 0;
7081 		break;
7082 	default:
7083 		adev->gds.gds_size = 0x10000;
7084 		break;
7085 	}
7086 
7087 	switch (adev->ip_versions[GC_HWIP][0]) {
7088 	case IP_VERSION(9, 0, 1):
7089 	case IP_VERSION(9, 4, 0):
7090 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7091 		break;
7092 	case IP_VERSION(9, 2, 1):
7093 		adev->gds.gds_compute_max_wave_id = 0x27f;
7094 		break;
7095 	case IP_VERSION(9, 2, 2):
7096 	case IP_VERSION(9, 1, 0):
7097 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7098 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7099 		else
7100 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7101 		break;
7102 	case IP_VERSION(9, 4, 1):
7103 		adev->gds.gds_compute_max_wave_id = 0xfff;
7104 		break;
7105 	case IP_VERSION(9, 4, 2):
7106 		/* deprecated for Aldebaran, no usage at all */
7107 		adev->gds.gds_compute_max_wave_id = 0;
7108 		break;
7109 	default:
7110 		/* this really depends on the chip */
7111 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7112 		break;
7113 	}
7114 
7115 	adev->gds.gws_size = 64;
7116 	adev->gds.oa_size = 16;
7117 }
7118 
7119 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7120 						 u32 bitmap)
7121 {
7122 	u32 data;
7123 
7124 	if (!bitmap)
7125 		return;
7126 
7127 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7128 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7129 
7130 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7131 }
7132 
7133 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7134 {
7135 	u32 data, mask;
7136 
7137 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7138 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7139 
7140 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7141 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7142 
7143 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7144 
7145 	return (~data) & mask;
7146 }
7147 
7148 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7149 				 struct amdgpu_cu_info *cu_info)
7150 {
7151 	int i, j, k, counter, active_cu_number = 0;
7152 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7153 	unsigned disable_masks[4 * 4];
7154 
7155 	if (!adev || !cu_info)
7156 		return -EINVAL;
7157 
7158 	/*
7159 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7160 	 */
7161 	if (adev->gfx.config.max_shader_engines *
7162 		adev->gfx.config.max_sh_per_se > 16)
7163 		return -EINVAL;
7164 
7165 	amdgpu_gfx_parse_disable_cu(disable_masks,
7166 				    adev->gfx.config.max_shader_engines,
7167 				    adev->gfx.config.max_sh_per_se);
7168 
7169 	mutex_lock(&adev->grbm_idx_mutex);
7170 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7171 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7172 			mask = 1;
7173 			ao_bitmap = 0;
7174 			counter = 0;
7175 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7176 			gfx_v9_0_set_user_cu_inactive_bitmap(
7177 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7178 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7179 
7180 			/*
7181 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7182 			 * 4x4 size array, and it's usually suitable for Vega
7183 			 * ASICs which has 4*2 SE/SH layout.
7184 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7185 			 * To mostly reduce the impact, we make it compatible
7186 			 * with current bitmap array as below:
7187 			 *    SE4,SH0 --> bitmap[0][1]
7188 			 *    SE5,SH0 --> bitmap[1][1]
7189 			 *    SE6,SH0 --> bitmap[2][1]
7190 			 *    SE7,SH0 --> bitmap[3][1]
7191 			 */
7192 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7193 
7194 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7195 				if (bitmap & mask) {
7196 					if (counter < adev->gfx.config.max_cu_per_sh)
7197 						ao_bitmap |= mask;
7198 					counter ++;
7199 				}
7200 				mask <<= 1;
7201 			}
7202 			active_cu_number += counter;
7203 			if (i < 2 && j < 2)
7204 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7205 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7206 		}
7207 	}
7208 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7209 	mutex_unlock(&adev->grbm_idx_mutex);
7210 
7211 	cu_info->number = active_cu_number;
7212 	cu_info->ao_cu_mask = ao_cu_mask;
7213 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7214 
7215 	return 0;
7216 }
7217 
7218 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7219 {
7220 	.type = AMD_IP_BLOCK_TYPE_GFX,
7221 	.major = 9,
7222 	.minor = 0,
7223 	.rev = 0,
7224 	.funcs = &gfx_v9_0_ip_funcs,
7225 };
7226