xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 23cb0767f0544858169c02cec445d066d4e02e2b)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147 
148 enum ta_ras_gfx_subblock {
149 	/*CPC*/
150 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 	TA_RAS_BLOCK__GFX_CPC_UCODE,
153 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 	/* CPF*/
161 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 	TA_RAS_BLOCK__GFX_CPF_TAG,
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 	/* CPG*/
167 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 	TA_RAS_BLOCK__GFX_CPG_TAG,
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 	/* GDS*/
173 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 	/* SPI*/
181 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 	/* SQ*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 	/* SQC (3 ranges)*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	/* SQC range 0*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	/* SQC range 1*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	/* SQC range 2*/
218 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 	/* TA*/
233 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 	/* TCA*/
241 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 	/* TCC (5 sub-ranges)*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	/* TCC range 0*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 	/* TCC range 1*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	/* TCC range 2*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	/* TCC range 3*/
277 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	/* TCC range 4*/
283 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 	/* TCI*/
291 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 	/* TCP*/
293 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 	/* TD*/
303 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 	/* EA (3 sub-ranges)*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	/* EA range 0*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 	/* EA range 1*/
322 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 	/* EA range 2*/
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 	/* UTC VM L2 bank*/
340 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 	/* UTC VM walker*/
342 	TA_RAS_BLOCK__UTC_VML2_WALKER,
343 	/* UTC ATC L2 2MB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 	/* UTC ATC L2 4KB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 	TA_RAS_BLOCK__GFX_MAX
348 };
349 
350 struct ras_gfx_subblock {
351 	unsigned char *name;
352 	int ta_subblock;
353 	int hw_supported_error_type;
354 	int sw_supported_error_type;
355 };
356 
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359 		#subblock,                                                     \
360 		TA_RAS_BLOCK__##subblock,                                      \
361 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363 	}
364 
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 			     0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 			     1),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 			     0, 0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 			     0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 			     0, 0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 			     0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 			     0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 			     0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 			     0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538 
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560 
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575 
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603 
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614 
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637 
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653 
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680 
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697 
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712 
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717 
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729 
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741 
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
746 
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752 				struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757 					  void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
759 				     void *inject_if);
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
761 
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
763 				uint64_t queue_mask)
764 {
765 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766 	amdgpu_ring_write(kiq_ring,
767 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
768 		/* vmid_mask:0* queue_type:0 (KIQ) */
769 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770 	amdgpu_ring_write(kiq_ring,
771 			lower_32_bits(queue_mask));	/* queue mask lo */
772 	amdgpu_ring_write(kiq_ring,
773 			upper_32_bits(queue_mask));	/* queue mask hi */
774 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
775 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
776 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
777 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
778 }
779 
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781 				 struct amdgpu_ring *ring)
782 {
783 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
784 	uint64_t wptr_addr = ring->wptr_gpu_addr;
785 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
786 
787 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
788 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
789 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
790 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
791 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
792 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
793 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
794 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
795 			 /*queue_type: normal compute queue */
796 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
797 			 /* alloc format: all_on_one_pipe */
798 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
799 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
800 			 /* num_queues: must be 1 */
801 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
802 	amdgpu_ring_write(kiq_ring,
803 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
804 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
805 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
806 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
807 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
808 }
809 
810 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
811 				   struct amdgpu_ring *ring,
812 				   enum amdgpu_unmap_queues_action action,
813 				   u64 gpu_addr, u64 seq)
814 {
815 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
816 
817 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
818 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
819 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
820 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
821 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
822 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
823 	amdgpu_ring_write(kiq_ring,
824 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
825 
826 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
827 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
828 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
829 		amdgpu_ring_write(kiq_ring, seq);
830 	} else {
831 		amdgpu_ring_write(kiq_ring, 0);
832 		amdgpu_ring_write(kiq_ring, 0);
833 		amdgpu_ring_write(kiq_ring, 0);
834 	}
835 }
836 
837 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
838 				   struct amdgpu_ring *ring,
839 				   u64 addr,
840 				   u64 seq)
841 {
842 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
843 
844 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
845 	amdgpu_ring_write(kiq_ring,
846 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
847 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
848 			  PACKET3_QUERY_STATUS_COMMAND(2));
849 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
850 	amdgpu_ring_write(kiq_ring,
851 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
852 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
853 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
854 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
855 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
856 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
857 }
858 
859 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
860 				uint16_t pasid, uint32_t flush_type,
861 				bool all_hub)
862 {
863 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
864 	amdgpu_ring_write(kiq_ring,
865 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
866 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
867 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
868 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
869 }
870 
871 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
872 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
873 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
874 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
875 	.kiq_query_status = gfx_v9_0_kiq_query_status,
876 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
877 	.set_resources_size = 8,
878 	.map_queues_size = 7,
879 	.unmap_queues_size = 6,
880 	.query_status_size = 7,
881 	.invalidate_tlbs_size = 2,
882 };
883 
884 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
885 {
886 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
887 }
888 
889 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
890 {
891 	switch (adev->ip_versions[GC_HWIP][0]) {
892 	case IP_VERSION(9, 0, 1):
893 		soc15_program_register_sequence(adev,
894 						golden_settings_gc_9_0,
895 						ARRAY_SIZE(golden_settings_gc_9_0));
896 		soc15_program_register_sequence(adev,
897 						golden_settings_gc_9_0_vg10,
898 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
899 		break;
900 	case IP_VERSION(9, 2, 1):
901 		soc15_program_register_sequence(adev,
902 						golden_settings_gc_9_2_1,
903 						ARRAY_SIZE(golden_settings_gc_9_2_1));
904 		soc15_program_register_sequence(adev,
905 						golden_settings_gc_9_2_1_vg12,
906 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
907 		break;
908 	case IP_VERSION(9, 4, 0):
909 		soc15_program_register_sequence(adev,
910 						golden_settings_gc_9_0,
911 						ARRAY_SIZE(golden_settings_gc_9_0));
912 		soc15_program_register_sequence(adev,
913 						golden_settings_gc_9_0_vg20,
914 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
915 		break;
916 	case IP_VERSION(9, 4, 1):
917 		soc15_program_register_sequence(adev,
918 						golden_settings_gc_9_4_1_arct,
919 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
920 		break;
921 	case IP_VERSION(9, 2, 2):
922 	case IP_VERSION(9, 1, 0):
923 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
924 						ARRAY_SIZE(golden_settings_gc_9_1));
925 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
926 			soc15_program_register_sequence(adev,
927 							golden_settings_gc_9_1_rv2,
928 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
929 		else
930 			soc15_program_register_sequence(adev,
931 							golden_settings_gc_9_1_rv1,
932 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
933 		break;
934 	 case IP_VERSION(9, 3, 0):
935 		soc15_program_register_sequence(adev,
936 						golden_settings_gc_9_1_rn,
937 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
938 		return; /* for renoir, don't need common goldensetting */
939 	case IP_VERSION(9, 4, 2):
940 		gfx_v9_4_2_init_golden_registers(adev,
941 						 adev->smuio.funcs->get_die_id(adev));
942 		break;
943 	default:
944 		break;
945 	}
946 
947 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
948 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
949 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
950 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
951 }
952 
953 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
954 				       bool wc, uint32_t reg, uint32_t val)
955 {
956 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
957 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
958 				WRITE_DATA_DST_SEL(0) |
959 				(wc ? WR_CONFIRM : 0));
960 	amdgpu_ring_write(ring, reg);
961 	amdgpu_ring_write(ring, 0);
962 	amdgpu_ring_write(ring, val);
963 }
964 
965 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
966 				  int mem_space, int opt, uint32_t addr0,
967 				  uint32_t addr1, uint32_t ref, uint32_t mask,
968 				  uint32_t inv)
969 {
970 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
971 	amdgpu_ring_write(ring,
972 				 /* memory (1) or register (0) */
973 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
974 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
975 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
976 				 WAIT_REG_MEM_ENGINE(eng_sel)));
977 
978 	if (mem_space)
979 		BUG_ON(addr0 & 0x3); /* Dword align */
980 	amdgpu_ring_write(ring, addr0);
981 	amdgpu_ring_write(ring, addr1);
982 	amdgpu_ring_write(ring, ref);
983 	amdgpu_ring_write(ring, mask);
984 	amdgpu_ring_write(ring, inv); /* poll interval */
985 }
986 
987 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
988 {
989 	struct amdgpu_device *adev = ring->adev;
990 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
991 	uint32_t tmp = 0;
992 	unsigned i;
993 	int r;
994 
995 	WREG32(scratch, 0xCAFEDEAD);
996 	r = amdgpu_ring_alloc(ring, 3);
997 	if (r)
998 		return r;
999 
1000 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1001 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1002 	amdgpu_ring_write(ring, 0xDEADBEEF);
1003 	amdgpu_ring_commit(ring);
1004 
1005 	for (i = 0; i < adev->usec_timeout; i++) {
1006 		tmp = RREG32(scratch);
1007 		if (tmp == 0xDEADBEEF)
1008 			break;
1009 		udelay(1);
1010 	}
1011 
1012 	if (i >= adev->usec_timeout)
1013 		r = -ETIMEDOUT;
1014 	return r;
1015 }
1016 
1017 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1018 {
1019 	struct amdgpu_device *adev = ring->adev;
1020 	struct amdgpu_ib ib;
1021 	struct dma_fence *f = NULL;
1022 
1023 	unsigned index;
1024 	uint64_t gpu_addr;
1025 	uint32_t tmp;
1026 	long r;
1027 
1028 	r = amdgpu_device_wb_get(adev, &index);
1029 	if (r)
1030 		return r;
1031 
1032 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1033 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1034 	memset(&ib, 0, sizeof(ib));
1035 	r = amdgpu_ib_get(adev, NULL, 16,
1036 					AMDGPU_IB_POOL_DIRECT, &ib);
1037 	if (r)
1038 		goto err1;
1039 
1040 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1041 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1042 	ib.ptr[2] = lower_32_bits(gpu_addr);
1043 	ib.ptr[3] = upper_32_bits(gpu_addr);
1044 	ib.ptr[4] = 0xDEADBEEF;
1045 	ib.length_dw = 5;
1046 
1047 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1048 	if (r)
1049 		goto err2;
1050 
1051 	r = dma_fence_wait_timeout(f, false, timeout);
1052 	if (r == 0) {
1053 		r = -ETIMEDOUT;
1054 		goto err2;
1055 	} else if (r < 0) {
1056 		goto err2;
1057 	}
1058 
1059 	tmp = adev->wb.wb[index];
1060 	if (tmp == 0xDEADBEEF)
1061 		r = 0;
1062 	else
1063 		r = -EINVAL;
1064 
1065 err2:
1066 	amdgpu_ib_free(adev, &ib, NULL);
1067 	dma_fence_put(f);
1068 err1:
1069 	amdgpu_device_wb_free(adev, index);
1070 	return r;
1071 }
1072 
1073 
1074 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1075 {
1076 	release_firmware(adev->gfx.pfp_fw);
1077 	adev->gfx.pfp_fw = NULL;
1078 	release_firmware(adev->gfx.me_fw);
1079 	adev->gfx.me_fw = NULL;
1080 	release_firmware(adev->gfx.ce_fw);
1081 	adev->gfx.ce_fw = NULL;
1082 	release_firmware(adev->gfx.rlc_fw);
1083 	adev->gfx.rlc_fw = NULL;
1084 	release_firmware(adev->gfx.mec_fw);
1085 	adev->gfx.mec_fw = NULL;
1086 	release_firmware(adev->gfx.mec2_fw);
1087 	adev->gfx.mec2_fw = NULL;
1088 
1089 	kfree(adev->gfx.rlc.register_list_format);
1090 }
1091 
1092 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1093 {
1094 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1095 
1096 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1097 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1098 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1099 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1100 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1101 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1102 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1103 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1104 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1105 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1106 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1107 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1108 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1109 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1110 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1111 }
1112 
1113 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1114 {
1115 	adev->gfx.me_fw_write_wait = false;
1116 	adev->gfx.mec_fw_write_wait = false;
1117 
1118 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1119 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1120 	    (adev->gfx.mec_feature_version < 46) ||
1121 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1122 	    (adev->gfx.pfp_feature_version < 46)))
1123 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1124 
1125 	switch (adev->ip_versions[GC_HWIP][0]) {
1126 	case IP_VERSION(9, 0, 1):
1127 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1128 		    (adev->gfx.me_feature_version >= 42) &&
1129 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1130 		    (adev->gfx.pfp_feature_version >= 42))
1131 			adev->gfx.me_fw_write_wait = true;
1132 
1133 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1134 		    (adev->gfx.mec_feature_version >= 42))
1135 			adev->gfx.mec_fw_write_wait = true;
1136 		break;
1137 	case IP_VERSION(9, 2, 1):
1138 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1139 		    (adev->gfx.me_feature_version >= 44) &&
1140 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1141 		    (adev->gfx.pfp_feature_version >= 44))
1142 			adev->gfx.me_fw_write_wait = true;
1143 
1144 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1145 		    (adev->gfx.mec_feature_version >= 44))
1146 			adev->gfx.mec_fw_write_wait = true;
1147 		break;
1148 	case IP_VERSION(9, 4, 0):
1149 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1150 		    (adev->gfx.me_feature_version >= 44) &&
1151 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1152 		    (adev->gfx.pfp_feature_version >= 44))
1153 			adev->gfx.me_fw_write_wait = true;
1154 
1155 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1156 		    (adev->gfx.mec_feature_version >= 44))
1157 			adev->gfx.mec_fw_write_wait = true;
1158 		break;
1159 	case IP_VERSION(9, 1, 0):
1160 	case IP_VERSION(9, 2, 2):
1161 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1162 		    (adev->gfx.me_feature_version >= 42) &&
1163 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1164 		    (adev->gfx.pfp_feature_version >= 42))
1165 			adev->gfx.me_fw_write_wait = true;
1166 
1167 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1168 		    (adev->gfx.mec_feature_version >= 42))
1169 			adev->gfx.mec_fw_write_wait = true;
1170 		break;
1171 	default:
1172 		adev->gfx.me_fw_write_wait = true;
1173 		adev->gfx.mec_fw_write_wait = true;
1174 		break;
1175 	}
1176 }
1177 
1178 struct amdgpu_gfxoff_quirk {
1179 	u16 chip_vendor;
1180 	u16 chip_device;
1181 	u16 subsys_vendor;
1182 	u16 subsys_device;
1183 	u8 revision;
1184 };
1185 
1186 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1187 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1188 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1189 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1190 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1191 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1192 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1193 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1194 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1195 	{ 0, 0, 0, 0, 0 },
1196 };
1197 
1198 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1199 {
1200 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1201 
1202 	while (p && p->chip_device != 0) {
1203 		if (pdev->vendor == p->chip_vendor &&
1204 		    pdev->device == p->chip_device &&
1205 		    pdev->subsystem_vendor == p->subsys_vendor &&
1206 		    pdev->subsystem_device == p->subsys_device &&
1207 		    pdev->revision == p->revision) {
1208 			return true;
1209 		}
1210 		++p;
1211 	}
1212 	return false;
1213 }
1214 
1215 static bool is_raven_kicker(struct amdgpu_device *adev)
1216 {
1217 	if (adev->pm.fw_version >= 0x41e2b)
1218 		return true;
1219 	else
1220 		return false;
1221 }
1222 
1223 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1224 {
1225 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1226 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1227 	    (adev->gfx.me_feature_version >= 52))
1228 		return true;
1229 	else
1230 		return false;
1231 }
1232 
1233 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1234 {
1235 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1236 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1237 
1238 	switch (adev->ip_versions[GC_HWIP][0]) {
1239 	case IP_VERSION(9, 0, 1):
1240 	case IP_VERSION(9, 2, 1):
1241 	case IP_VERSION(9, 4, 0):
1242 		break;
1243 	case IP_VERSION(9, 2, 2):
1244 	case IP_VERSION(9, 1, 0):
1245 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1246 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1247 		    ((!is_raven_kicker(adev) &&
1248 		      adev->gfx.rlc_fw_version < 531) ||
1249 		     (adev->gfx.rlc_feature_version < 1) ||
1250 		     !adev->gfx.rlc.is_rlc_v2_1))
1251 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1252 
1253 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1254 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1255 				AMD_PG_SUPPORT_CP |
1256 				AMD_PG_SUPPORT_RLC_SMU_HS;
1257 		break;
1258 	case IP_VERSION(9, 3, 0):
1259 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1260 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1261 				AMD_PG_SUPPORT_CP |
1262 				AMD_PG_SUPPORT_RLC_SMU_HS;
1263 		break;
1264 	default:
1265 		break;
1266 	}
1267 }
1268 
1269 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1270 					  const char *chip_name)
1271 {
1272 	char fw_name[30];
1273 	int err;
1274 	struct amdgpu_firmware_info *info = NULL;
1275 	const struct common_firmware_header *header = NULL;
1276 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1277 
1278 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1279 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1280 	if (err)
1281 		goto out;
1282 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1283 	if (err)
1284 		goto out;
1285 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1286 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1287 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1288 
1289 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1290 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1291 	if (err)
1292 		goto out;
1293 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1294 	if (err)
1295 		goto out;
1296 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1297 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1298 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1299 
1300 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1301 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1302 	if (err)
1303 		goto out;
1304 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1305 	if (err)
1306 		goto out;
1307 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1308 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1309 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1310 
1311 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1312 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1313 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1314 		info->fw = adev->gfx.pfp_fw;
1315 		header = (const struct common_firmware_header *)info->fw->data;
1316 		adev->firmware.fw_size +=
1317 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1318 
1319 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1320 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1321 		info->fw = adev->gfx.me_fw;
1322 		header = (const struct common_firmware_header *)info->fw->data;
1323 		adev->firmware.fw_size +=
1324 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1325 
1326 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1327 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1328 		info->fw = adev->gfx.ce_fw;
1329 		header = (const struct common_firmware_header *)info->fw->data;
1330 		adev->firmware.fw_size +=
1331 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1332 	}
1333 
1334 out:
1335 	if (err) {
1336 		dev_err(adev->dev,
1337 			"gfx9: Failed to load firmware \"%s\"\n",
1338 			fw_name);
1339 		release_firmware(adev->gfx.pfp_fw);
1340 		adev->gfx.pfp_fw = NULL;
1341 		release_firmware(adev->gfx.me_fw);
1342 		adev->gfx.me_fw = NULL;
1343 		release_firmware(adev->gfx.ce_fw);
1344 		adev->gfx.ce_fw = NULL;
1345 	}
1346 	return err;
1347 }
1348 
1349 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1350 					  const char *chip_name)
1351 {
1352 	char fw_name[30];
1353 	int err;
1354 	struct amdgpu_firmware_info *info = NULL;
1355 	const struct common_firmware_header *header = NULL;
1356 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1357 	unsigned int *tmp = NULL;
1358 	unsigned int i = 0;
1359 	uint16_t version_major;
1360 	uint16_t version_minor;
1361 	uint32_t smu_version;
1362 
1363 	/*
1364 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1365 	 * instead of picasso_rlc.bin.
1366 	 * Judgment method:
1367 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1368 	 *          or revision >= 0xD8 && revision <= 0xDF
1369 	 * otherwise is PCO FP5
1370 	 */
1371 	if (!strcmp(chip_name, "picasso") &&
1372 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1373 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1374 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1375 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1376 		(smu_version >= 0x41e2b))
1377 		/**
1378 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1379 		*/
1380 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1381 	else
1382 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1383 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1384 	if (err)
1385 		goto out;
1386 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1387 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1388 
1389 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1390 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1391 	if (version_major == 2 && version_minor == 1)
1392 		adev->gfx.rlc.is_rlc_v2_1 = true;
1393 
1394 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1395 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1396 	adev->gfx.rlc.save_and_restore_offset =
1397 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1398 	adev->gfx.rlc.clear_state_descriptor_offset =
1399 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1400 	adev->gfx.rlc.avail_scratch_ram_locations =
1401 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1402 	adev->gfx.rlc.reg_restore_list_size =
1403 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1404 	adev->gfx.rlc.reg_list_format_start =
1405 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1406 	adev->gfx.rlc.reg_list_format_separate_start =
1407 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1408 	adev->gfx.rlc.starting_offsets_start =
1409 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1410 	adev->gfx.rlc.reg_list_format_size_bytes =
1411 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1412 	adev->gfx.rlc.reg_list_size_bytes =
1413 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1414 	adev->gfx.rlc.register_list_format =
1415 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1416 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1417 	if (!adev->gfx.rlc.register_list_format) {
1418 		err = -ENOMEM;
1419 		goto out;
1420 	}
1421 
1422 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1423 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1424 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1425 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1426 
1427 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1428 
1429 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1430 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1431 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1432 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1433 
1434 	if (adev->gfx.rlc.is_rlc_v2_1)
1435 		gfx_v9_0_init_rlc_ext_microcode(adev);
1436 
1437 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1438 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1439 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1440 		info->fw = adev->gfx.rlc_fw;
1441 		header = (const struct common_firmware_header *)info->fw->data;
1442 		adev->firmware.fw_size +=
1443 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1444 
1445 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1446 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1447 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1448 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1449 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1450 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1451 			info->fw = adev->gfx.rlc_fw;
1452 			adev->firmware.fw_size +=
1453 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1454 
1455 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1456 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1457 			info->fw = adev->gfx.rlc_fw;
1458 			adev->firmware.fw_size +=
1459 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1460 
1461 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1462 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1463 			info->fw = adev->gfx.rlc_fw;
1464 			adev->firmware.fw_size +=
1465 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1466 		}
1467 	}
1468 
1469 out:
1470 	if (err) {
1471 		dev_err(adev->dev,
1472 			"gfx9: Failed to load firmware \"%s\"\n",
1473 			fw_name);
1474 		release_firmware(adev->gfx.rlc_fw);
1475 		adev->gfx.rlc_fw = NULL;
1476 	}
1477 	return err;
1478 }
1479 
1480 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1481 {
1482 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1483 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1484 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1485 		return false;
1486 
1487 	return true;
1488 }
1489 
1490 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1491 					  const char *chip_name)
1492 {
1493 	char fw_name[30];
1494 	int err;
1495 	struct amdgpu_firmware_info *info = NULL;
1496 	const struct common_firmware_header *header = NULL;
1497 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1498 
1499 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1500 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1501 	if (err)
1502 		goto out;
1503 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1504 	if (err)
1505 		goto out;
1506 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1507 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1508 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1509 
1510 
1511 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1512 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1513 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1514 		if (!err) {
1515 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1516 			if (err)
1517 				goto out;
1518 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1519 			adev->gfx.mec2_fw->data;
1520 			adev->gfx.mec2_fw_version =
1521 			le32_to_cpu(cp_hdr->header.ucode_version);
1522 			adev->gfx.mec2_feature_version =
1523 			le32_to_cpu(cp_hdr->ucode_feature_version);
1524 		} else {
1525 			err = 0;
1526 			adev->gfx.mec2_fw = NULL;
1527 		}
1528 	} else {
1529 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1530 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1531 	}
1532 
1533 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1534 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1535 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1536 		info->fw = adev->gfx.mec_fw;
1537 		header = (const struct common_firmware_header *)info->fw->data;
1538 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1539 		adev->firmware.fw_size +=
1540 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1541 
1542 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1543 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1544 		info->fw = adev->gfx.mec_fw;
1545 		adev->firmware.fw_size +=
1546 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1547 
1548 		if (adev->gfx.mec2_fw) {
1549 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1550 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1551 			info->fw = adev->gfx.mec2_fw;
1552 			header = (const struct common_firmware_header *)info->fw->data;
1553 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1554 			adev->firmware.fw_size +=
1555 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1556 
1557 			/* TODO: Determine if MEC2 JT FW loading can be removed
1558 				 for all GFX V9 asic and above */
1559 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1560 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1561 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1562 				info->fw = adev->gfx.mec2_fw;
1563 				adev->firmware.fw_size +=
1564 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1565 					PAGE_SIZE);
1566 			}
1567 		}
1568 	}
1569 
1570 out:
1571 	gfx_v9_0_check_if_need_gfxoff(adev);
1572 	gfx_v9_0_check_fw_write_wait(adev);
1573 	if (err) {
1574 		dev_err(adev->dev,
1575 			"gfx9: Failed to load firmware \"%s\"\n",
1576 			fw_name);
1577 		release_firmware(adev->gfx.mec_fw);
1578 		adev->gfx.mec_fw = NULL;
1579 		release_firmware(adev->gfx.mec2_fw);
1580 		adev->gfx.mec2_fw = NULL;
1581 	}
1582 	return err;
1583 }
1584 
1585 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1586 {
1587 	const char *chip_name;
1588 	int r;
1589 
1590 	DRM_DEBUG("\n");
1591 
1592 	switch (adev->ip_versions[GC_HWIP][0]) {
1593 	case IP_VERSION(9, 0, 1):
1594 		chip_name = "vega10";
1595 		break;
1596 	case IP_VERSION(9, 2, 1):
1597 		chip_name = "vega12";
1598 		break;
1599 	case IP_VERSION(9, 4, 0):
1600 		chip_name = "vega20";
1601 		break;
1602 	case IP_VERSION(9, 2, 2):
1603 	case IP_VERSION(9, 1, 0):
1604 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1605 			chip_name = "raven2";
1606 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1607 			chip_name = "picasso";
1608 		else
1609 			chip_name = "raven";
1610 		break;
1611 	case IP_VERSION(9, 4, 1):
1612 		chip_name = "arcturus";
1613 		break;
1614 	case IP_VERSION(9, 3, 0):
1615 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1616 			chip_name = "renoir";
1617 		else
1618 			chip_name = "green_sardine";
1619 		break;
1620 	case IP_VERSION(9, 4, 2):
1621 		chip_name = "aldebaran";
1622 		break;
1623 	default:
1624 		BUG();
1625 	}
1626 
1627 	/* No CPG in Arcturus */
1628 	if (adev->gfx.num_gfx_rings) {
1629 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1630 		if (r)
1631 			return r;
1632 	}
1633 
1634 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1635 	if (r)
1636 		return r;
1637 
1638 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1639 	if (r)
1640 		return r;
1641 
1642 	return r;
1643 }
1644 
1645 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1646 {
1647 	u32 count = 0;
1648 	const struct cs_section_def *sect = NULL;
1649 	const struct cs_extent_def *ext = NULL;
1650 
1651 	/* begin clear state */
1652 	count += 2;
1653 	/* context control state */
1654 	count += 3;
1655 
1656 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1657 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1658 			if (sect->id == SECT_CONTEXT)
1659 				count += 2 + ext->reg_count;
1660 			else
1661 				return 0;
1662 		}
1663 	}
1664 
1665 	/* end clear state */
1666 	count += 2;
1667 	/* clear state */
1668 	count += 2;
1669 
1670 	return count;
1671 }
1672 
1673 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1674 				    volatile u32 *buffer)
1675 {
1676 	u32 count = 0, i;
1677 	const struct cs_section_def *sect = NULL;
1678 	const struct cs_extent_def *ext = NULL;
1679 
1680 	if (adev->gfx.rlc.cs_data == NULL)
1681 		return;
1682 	if (buffer == NULL)
1683 		return;
1684 
1685 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1686 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1687 
1688 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1689 	buffer[count++] = cpu_to_le32(0x80000000);
1690 	buffer[count++] = cpu_to_le32(0x80000000);
1691 
1692 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1693 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1694 			if (sect->id == SECT_CONTEXT) {
1695 				buffer[count++] =
1696 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1697 				buffer[count++] = cpu_to_le32(ext->reg_index -
1698 						PACKET3_SET_CONTEXT_REG_START);
1699 				for (i = 0; i < ext->reg_count; i++)
1700 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1701 			} else {
1702 				return;
1703 			}
1704 		}
1705 	}
1706 
1707 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1708 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1709 
1710 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1711 	buffer[count++] = cpu_to_le32(0);
1712 }
1713 
1714 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1715 {
1716 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1717 	uint32_t pg_always_on_cu_num = 2;
1718 	uint32_t always_on_cu_num;
1719 	uint32_t i, j, k;
1720 	uint32_t mask, cu_bitmap, counter;
1721 
1722 	if (adev->flags & AMD_IS_APU)
1723 		always_on_cu_num = 4;
1724 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1725 		always_on_cu_num = 8;
1726 	else
1727 		always_on_cu_num = 12;
1728 
1729 	mutex_lock(&adev->grbm_idx_mutex);
1730 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1731 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1732 			mask = 1;
1733 			cu_bitmap = 0;
1734 			counter = 0;
1735 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1736 
1737 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1738 				if (cu_info->bitmap[i][j] & mask) {
1739 					if (counter == pg_always_on_cu_num)
1740 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1741 					if (counter < always_on_cu_num)
1742 						cu_bitmap |= mask;
1743 					else
1744 						break;
1745 					counter++;
1746 				}
1747 				mask <<= 1;
1748 			}
1749 
1750 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1751 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1752 		}
1753 	}
1754 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1755 	mutex_unlock(&adev->grbm_idx_mutex);
1756 }
1757 
1758 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1759 {
1760 	uint32_t data;
1761 
1762 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1763 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1764 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1765 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1766 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1767 
1768 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1769 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1770 
1771 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1772 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1773 
1774 	mutex_lock(&adev->grbm_idx_mutex);
1775 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1776 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1777 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1778 
1779 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1780 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1781 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1782 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1783 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1784 
1785 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1786 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1787 	data &= 0x0000FFFF;
1788 	data |= 0x00C00000;
1789 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1790 
1791 	/*
1792 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1793 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1794 	 */
1795 
1796 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1797 	 * but used for RLC_LB_CNTL configuration */
1798 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1799 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1800 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1801 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1802 	mutex_unlock(&adev->grbm_idx_mutex);
1803 
1804 	gfx_v9_0_init_always_on_cu_mask(adev);
1805 }
1806 
1807 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1808 {
1809 	uint32_t data;
1810 
1811 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1812 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1813 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1814 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1815 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1816 
1817 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1818 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1819 
1820 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1821 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1822 
1823 	mutex_lock(&adev->grbm_idx_mutex);
1824 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1825 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1826 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1827 
1828 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1829 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1830 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1831 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1832 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1833 
1834 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1835 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1836 	data &= 0x0000FFFF;
1837 	data |= 0x00C00000;
1838 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1839 
1840 	/*
1841 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1842 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1843 	 */
1844 
1845 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1846 	 * but used for RLC_LB_CNTL configuration */
1847 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1848 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1849 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1850 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1851 	mutex_unlock(&adev->grbm_idx_mutex);
1852 
1853 	gfx_v9_0_init_always_on_cu_mask(adev);
1854 }
1855 
1856 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1857 {
1858 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1859 }
1860 
1861 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1862 {
1863 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1864 		return 5;
1865 	else
1866 		return 4;
1867 }
1868 
1869 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1870 {
1871 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1872 
1873 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1874 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1875 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1876 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1877 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1878 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1879 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1880 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1881 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1882 }
1883 
1884 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1885 {
1886 	const struct cs_section_def *cs_data;
1887 	int r;
1888 
1889 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1890 
1891 	cs_data = adev->gfx.rlc.cs_data;
1892 
1893 	if (cs_data) {
1894 		/* init clear state block */
1895 		r = amdgpu_gfx_rlc_init_csb(adev);
1896 		if (r)
1897 			return r;
1898 	}
1899 
1900 	if (adev->flags & AMD_IS_APU) {
1901 		/* TODO: double check the cp_table_size for RV */
1902 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1903 		r = amdgpu_gfx_rlc_init_cpt(adev);
1904 		if (r)
1905 			return r;
1906 	}
1907 
1908 	switch (adev->ip_versions[GC_HWIP][0]) {
1909 	case IP_VERSION(9, 2, 2):
1910 	case IP_VERSION(9, 1, 0):
1911 		gfx_v9_0_init_lbpw(adev);
1912 		break;
1913 	case IP_VERSION(9, 4, 0):
1914 		gfx_v9_4_init_lbpw(adev);
1915 		break;
1916 	default:
1917 		break;
1918 	}
1919 
1920 	/* init spm vmid with 0xf */
1921 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1922 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1923 
1924 	return 0;
1925 }
1926 
1927 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1928 {
1929 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1930 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1931 }
1932 
1933 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1934 {
1935 	int r;
1936 	u32 *hpd;
1937 	const __le32 *fw_data;
1938 	unsigned fw_size;
1939 	u32 *fw;
1940 	size_t mec_hpd_size;
1941 
1942 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1943 
1944 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1945 
1946 	/* take ownership of the relevant compute queues */
1947 	amdgpu_gfx_compute_queue_acquire(adev);
1948 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1949 	if (mec_hpd_size) {
1950 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1951 					      AMDGPU_GEM_DOMAIN_VRAM,
1952 					      &adev->gfx.mec.hpd_eop_obj,
1953 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1954 					      (void **)&hpd);
1955 		if (r) {
1956 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1957 			gfx_v9_0_mec_fini(adev);
1958 			return r;
1959 		}
1960 
1961 		memset(hpd, 0, mec_hpd_size);
1962 
1963 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1964 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1965 	}
1966 
1967 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1968 
1969 	fw_data = (const __le32 *)
1970 		(adev->gfx.mec_fw->data +
1971 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1972 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1973 
1974 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1975 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1976 				      &adev->gfx.mec.mec_fw_obj,
1977 				      &adev->gfx.mec.mec_fw_gpu_addr,
1978 				      (void **)&fw);
1979 	if (r) {
1980 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1981 		gfx_v9_0_mec_fini(adev);
1982 		return r;
1983 	}
1984 
1985 	memcpy(fw, fw_data, fw_size);
1986 
1987 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1988 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1989 
1990 	return 0;
1991 }
1992 
1993 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1994 {
1995 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1996 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1997 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1998 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1999 		(SQ_IND_INDEX__FORCE_READ_MASK));
2000 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2001 }
2002 
2003 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2004 			   uint32_t wave, uint32_t thread,
2005 			   uint32_t regno, uint32_t num, uint32_t *out)
2006 {
2007 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2008 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2009 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2010 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2011 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2012 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2013 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2014 	while (num--)
2015 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2016 }
2017 
2018 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2019 {
2020 	/* type 1 wave data */
2021 	dst[(*no_fields)++] = 1;
2022 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2023 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2024 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2025 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2026 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2027 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2028 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2029 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2030 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2031 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2032 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2033 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2034 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2035 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2036 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2037 }
2038 
2039 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2040 				     uint32_t wave, uint32_t start,
2041 				     uint32_t size, uint32_t *dst)
2042 {
2043 	wave_read_regs(
2044 		adev, simd, wave, 0,
2045 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2046 }
2047 
2048 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2049 				     uint32_t wave, uint32_t thread,
2050 				     uint32_t start, uint32_t size,
2051 				     uint32_t *dst)
2052 {
2053 	wave_read_regs(
2054 		adev, simd, wave, thread,
2055 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2056 }
2057 
2058 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2059 				  u32 me, u32 pipe, u32 q, u32 vm)
2060 {
2061 	soc15_grbm_select(adev, me, pipe, q, vm);
2062 }
2063 
2064 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2065         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2066         .select_se_sh = &gfx_v9_0_select_se_sh,
2067         .read_wave_data = &gfx_v9_0_read_wave_data,
2068         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2069         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2070         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2071 };
2072 
2073 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2074 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2075 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2076 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2077 };
2078 
2079 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2080 	.ras_block = {
2081 		.hw_ops = &gfx_v9_0_ras_ops,
2082 	},
2083 };
2084 
2085 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2086 {
2087 	u32 gb_addr_config;
2088 	int err;
2089 
2090 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2091 
2092 	switch (adev->ip_versions[GC_HWIP][0]) {
2093 	case IP_VERSION(9, 0, 1):
2094 		adev->gfx.config.max_hw_contexts = 8;
2095 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2096 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2097 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2098 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2099 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2100 		break;
2101 	case IP_VERSION(9, 2, 1):
2102 		adev->gfx.config.max_hw_contexts = 8;
2103 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2104 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2105 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2106 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2107 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2108 		DRM_INFO("fix gfx.config for vega12\n");
2109 		break;
2110 	case IP_VERSION(9, 4, 0):
2111 		adev->gfx.ras = &gfx_v9_0_ras;
2112 		adev->gfx.config.max_hw_contexts = 8;
2113 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2114 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2115 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2116 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2117 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2118 		gb_addr_config &= ~0xf3e777ff;
2119 		gb_addr_config |= 0x22014042;
2120 		/* check vbios table if gpu info is not available */
2121 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2122 		if (err)
2123 			return err;
2124 		break;
2125 	case IP_VERSION(9, 2, 2):
2126 	case IP_VERSION(9, 1, 0):
2127 		adev->gfx.config.max_hw_contexts = 8;
2128 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2129 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2130 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2131 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2132 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2133 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2134 		else
2135 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2136 		break;
2137 	case IP_VERSION(9, 4, 1):
2138 		adev->gfx.ras = &gfx_v9_4_ras;
2139 		adev->gfx.config.max_hw_contexts = 8;
2140 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2141 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2142 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2143 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2144 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2145 		gb_addr_config &= ~0xf3e777ff;
2146 		gb_addr_config |= 0x22014042;
2147 		break;
2148 	case IP_VERSION(9, 3, 0):
2149 		adev->gfx.config.max_hw_contexts = 8;
2150 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2151 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2152 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2153 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2154 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2155 		gb_addr_config &= ~0xf3e777ff;
2156 		gb_addr_config |= 0x22010042;
2157 		break;
2158 	case IP_VERSION(9, 4, 2):
2159 		adev->gfx.ras = &gfx_v9_4_2_ras;
2160 		adev->gfx.config.max_hw_contexts = 8;
2161 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2162 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2163 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2164 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2165 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2166 		gb_addr_config &= ~0xf3e777ff;
2167 		gb_addr_config |= 0x22014042;
2168 		/* check vbios table if gpu info is not available */
2169 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2170 		if (err)
2171 			return err;
2172 		break;
2173 	default:
2174 		BUG();
2175 		break;
2176 	}
2177 
2178 	if (adev->gfx.ras) {
2179 		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2180 		if (err) {
2181 			DRM_ERROR("Failed to register gfx ras block!\n");
2182 			return err;
2183 		}
2184 
2185 		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2186 		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2187 		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2188 		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2189 
2190 		/* If not define special ras_late_init function, use gfx default ras_late_init */
2191 		if (!adev->gfx.ras->ras_block.ras_late_init)
2192 			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2193 
2194 		/* If not defined special ras_cb function, use default ras_cb */
2195 		if (!adev->gfx.ras->ras_block.ras_cb)
2196 			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2197 	}
2198 
2199 	adev->gfx.config.gb_addr_config = gb_addr_config;
2200 
2201 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2202 			REG_GET_FIELD(
2203 					adev->gfx.config.gb_addr_config,
2204 					GB_ADDR_CONFIG,
2205 					NUM_PIPES);
2206 
2207 	adev->gfx.config.max_tile_pipes =
2208 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2209 
2210 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2211 			REG_GET_FIELD(
2212 					adev->gfx.config.gb_addr_config,
2213 					GB_ADDR_CONFIG,
2214 					NUM_BANKS);
2215 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2216 			REG_GET_FIELD(
2217 					adev->gfx.config.gb_addr_config,
2218 					GB_ADDR_CONFIG,
2219 					MAX_COMPRESSED_FRAGS);
2220 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2221 			REG_GET_FIELD(
2222 					adev->gfx.config.gb_addr_config,
2223 					GB_ADDR_CONFIG,
2224 					NUM_RB_PER_SE);
2225 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2226 			REG_GET_FIELD(
2227 					adev->gfx.config.gb_addr_config,
2228 					GB_ADDR_CONFIG,
2229 					NUM_SHADER_ENGINES);
2230 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2231 			REG_GET_FIELD(
2232 					adev->gfx.config.gb_addr_config,
2233 					GB_ADDR_CONFIG,
2234 					PIPE_INTERLEAVE_SIZE));
2235 
2236 	return 0;
2237 }
2238 
2239 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2240 				      int mec, int pipe, int queue)
2241 {
2242 	unsigned irq_type;
2243 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2244 	unsigned int hw_prio;
2245 
2246 	ring = &adev->gfx.compute_ring[ring_id];
2247 
2248 	/* mec0 is me1 */
2249 	ring->me = mec + 1;
2250 	ring->pipe = pipe;
2251 	ring->queue = queue;
2252 
2253 	ring->ring_obj = NULL;
2254 	ring->use_doorbell = true;
2255 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2256 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2257 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2258 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2259 
2260 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2261 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2262 		+ ring->pipe;
2263 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2264 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2265 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2266 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2267 				hw_prio, NULL);
2268 }
2269 
2270 static int gfx_v9_0_sw_init(void *handle)
2271 {
2272 	int i, j, k, r, ring_id;
2273 	struct amdgpu_ring *ring;
2274 	struct amdgpu_kiq *kiq;
2275 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2276 
2277 	switch (adev->ip_versions[GC_HWIP][0]) {
2278 	case IP_VERSION(9, 0, 1):
2279 	case IP_VERSION(9, 2, 1):
2280 	case IP_VERSION(9, 4, 0):
2281 	case IP_VERSION(9, 2, 2):
2282 	case IP_VERSION(9, 1, 0):
2283 	case IP_VERSION(9, 4, 1):
2284 	case IP_VERSION(9, 3, 0):
2285 	case IP_VERSION(9, 4, 2):
2286 		adev->gfx.mec.num_mec = 2;
2287 		break;
2288 	default:
2289 		adev->gfx.mec.num_mec = 1;
2290 		break;
2291 	}
2292 
2293 	adev->gfx.mec.num_pipe_per_mec = 4;
2294 	adev->gfx.mec.num_queue_per_pipe = 8;
2295 
2296 	/* EOP Event */
2297 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2298 	if (r)
2299 		return r;
2300 
2301 	/* Privileged reg */
2302 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2303 			      &adev->gfx.priv_reg_irq);
2304 	if (r)
2305 		return r;
2306 
2307 	/* Privileged inst */
2308 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2309 			      &adev->gfx.priv_inst_irq);
2310 	if (r)
2311 		return r;
2312 
2313 	/* ECC error */
2314 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2315 			      &adev->gfx.cp_ecc_error_irq);
2316 	if (r)
2317 		return r;
2318 
2319 	/* FUE error */
2320 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2321 			      &adev->gfx.cp_ecc_error_irq);
2322 	if (r)
2323 		return r;
2324 
2325 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2326 
2327 	r = gfx_v9_0_init_microcode(adev);
2328 	if (r) {
2329 		DRM_ERROR("Failed to load gfx firmware!\n");
2330 		return r;
2331 	}
2332 
2333 	if (adev->gfx.rlc.funcs) {
2334 		if (adev->gfx.rlc.funcs->init) {
2335 			r = adev->gfx.rlc.funcs->init(adev);
2336 			if (r) {
2337 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2338 				return r;
2339 			}
2340 		}
2341 	}
2342 
2343 	r = gfx_v9_0_mec_init(adev);
2344 	if (r) {
2345 		DRM_ERROR("Failed to init MEC BOs!\n");
2346 		return r;
2347 	}
2348 
2349 	/* set up the gfx ring */
2350 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2351 		ring = &adev->gfx.gfx_ring[i];
2352 		ring->ring_obj = NULL;
2353 		if (!i)
2354 			sprintf(ring->name, "gfx");
2355 		else
2356 			sprintf(ring->name, "gfx_%d", i);
2357 		ring->use_doorbell = true;
2358 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2359 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2360 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2361 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2362 		if (r)
2363 			return r;
2364 	}
2365 
2366 	/* set up the compute queues - allocate horizontally across pipes */
2367 	ring_id = 0;
2368 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2369 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2370 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2371 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2372 					continue;
2373 
2374 				r = gfx_v9_0_compute_ring_init(adev,
2375 							       ring_id,
2376 							       i, k, j);
2377 				if (r)
2378 					return r;
2379 
2380 				ring_id++;
2381 			}
2382 		}
2383 	}
2384 
2385 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2386 	if (r) {
2387 		DRM_ERROR("Failed to init KIQ BOs!\n");
2388 		return r;
2389 	}
2390 
2391 	kiq = &adev->gfx.kiq;
2392 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2393 	if (r)
2394 		return r;
2395 
2396 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2397 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2398 	if (r)
2399 		return r;
2400 
2401 	adev->gfx.ce_ram_size = 0x8000;
2402 
2403 	r = gfx_v9_0_gpu_early_init(adev);
2404 	if (r)
2405 		return r;
2406 
2407 	return 0;
2408 }
2409 
2410 
2411 static int gfx_v9_0_sw_fini(void *handle)
2412 {
2413 	int i;
2414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2415 
2416 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2417 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2418 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2419 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2420 
2421 	amdgpu_gfx_mqd_sw_fini(adev);
2422 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2423 	amdgpu_gfx_kiq_fini(adev);
2424 
2425 	gfx_v9_0_mec_fini(adev);
2426 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2427 				&adev->gfx.rlc.clear_state_gpu_addr,
2428 				(void **)&adev->gfx.rlc.cs_ptr);
2429 	if (adev->flags & AMD_IS_APU) {
2430 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2431 				&adev->gfx.rlc.cp_table_gpu_addr,
2432 				(void **)&adev->gfx.rlc.cp_table_ptr);
2433 	}
2434 	gfx_v9_0_free_microcode(adev);
2435 
2436 	return 0;
2437 }
2438 
2439 
2440 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2441 {
2442 	/* TODO */
2443 }
2444 
2445 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2446 			   u32 instance)
2447 {
2448 	u32 data;
2449 
2450 	if (instance == 0xffffffff)
2451 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2452 	else
2453 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2454 
2455 	if (se_num == 0xffffffff)
2456 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2457 	else
2458 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2459 
2460 	if (sh_num == 0xffffffff)
2461 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2462 	else
2463 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2464 
2465 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2466 }
2467 
2468 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2469 {
2470 	u32 data, mask;
2471 
2472 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2473 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2474 
2475 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2476 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2477 
2478 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2479 					 adev->gfx.config.max_sh_per_se);
2480 
2481 	return (~data) & mask;
2482 }
2483 
2484 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2485 {
2486 	int i, j;
2487 	u32 data;
2488 	u32 active_rbs = 0;
2489 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2490 					adev->gfx.config.max_sh_per_se;
2491 
2492 	mutex_lock(&adev->grbm_idx_mutex);
2493 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2494 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2495 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2496 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2497 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2498 					       rb_bitmap_width_per_sh);
2499 		}
2500 	}
2501 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2502 	mutex_unlock(&adev->grbm_idx_mutex);
2503 
2504 	adev->gfx.config.backend_enable_mask = active_rbs;
2505 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2506 }
2507 
2508 #define DEFAULT_SH_MEM_BASES	(0x6000)
2509 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2510 {
2511 	int i;
2512 	uint32_t sh_mem_config;
2513 	uint32_t sh_mem_bases;
2514 
2515 	/*
2516 	 * Configure apertures:
2517 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2518 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2519 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2520 	 */
2521 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2522 
2523 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2524 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2525 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2526 
2527 	mutex_lock(&adev->srbm_mutex);
2528 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2529 		soc15_grbm_select(adev, 0, 0, 0, i);
2530 		/* CP and shaders */
2531 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2532 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2533 	}
2534 	soc15_grbm_select(adev, 0, 0, 0, 0);
2535 	mutex_unlock(&adev->srbm_mutex);
2536 
2537 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2538 	   access. These should be enabled by FW for target VMIDs. */
2539 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2540 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2541 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2542 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2543 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2544 	}
2545 }
2546 
2547 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2548 {
2549 	int vmid;
2550 
2551 	/*
2552 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2553 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2554 	 * the driver can enable them for graphics. VMID0 should maintain
2555 	 * access so that HWS firmware can save/restore entries.
2556 	 */
2557 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2558 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2559 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2560 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2561 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2562 	}
2563 }
2564 
2565 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2566 {
2567 	uint32_t tmp;
2568 
2569 	switch (adev->ip_versions[GC_HWIP][0]) {
2570 	case IP_VERSION(9, 4, 1):
2571 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2572 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2573 					DISABLE_BARRIER_WAITCNT, 1);
2574 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2575 		break;
2576 	default:
2577 		break;
2578 	}
2579 }
2580 
2581 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2582 {
2583 	u32 tmp;
2584 	int i;
2585 
2586 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2587 
2588 	gfx_v9_0_tiling_mode_table_init(adev);
2589 
2590 	if (adev->gfx.num_gfx_rings)
2591 		gfx_v9_0_setup_rb(adev);
2592 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2593 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2594 
2595 	/* XXX SH_MEM regs */
2596 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2597 	mutex_lock(&adev->srbm_mutex);
2598 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2599 		soc15_grbm_select(adev, 0, 0, 0, i);
2600 		/* CP and shaders */
2601 		if (i == 0) {
2602 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2603 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2604 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2605 					    !!adev->gmc.noretry);
2606 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2607 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2608 		} else {
2609 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2610 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2611 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2612 					    !!adev->gmc.noretry);
2613 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2614 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2615 				(adev->gmc.private_aperture_start >> 48));
2616 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2617 				(adev->gmc.shared_aperture_start >> 48));
2618 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2619 		}
2620 	}
2621 	soc15_grbm_select(adev, 0, 0, 0, 0);
2622 
2623 	mutex_unlock(&adev->srbm_mutex);
2624 
2625 	gfx_v9_0_init_compute_vmid(adev);
2626 	gfx_v9_0_init_gds_vmid(adev);
2627 	gfx_v9_0_init_sq_config(adev);
2628 }
2629 
2630 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2631 {
2632 	u32 i, j, k;
2633 	u32 mask;
2634 
2635 	mutex_lock(&adev->grbm_idx_mutex);
2636 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2637 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2638 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2639 			for (k = 0; k < adev->usec_timeout; k++) {
2640 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2641 					break;
2642 				udelay(1);
2643 			}
2644 			if (k == adev->usec_timeout) {
2645 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2646 						      0xffffffff, 0xffffffff);
2647 				mutex_unlock(&adev->grbm_idx_mutex);
2648 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2649 					 i, j);
2650 				return;
2651 			}
2652 		}
2653 	}
2654 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2655 	mutex_unlock(&adev->grbm_idx_mutex);
2656 
2657 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2658 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2659 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2660 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2661 	for (k = 0; k < adev->usec_timeout; k++) {
2662 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2663 			break;
2664 		udelay(1);
2665 	}
2666 }
2667 
2668 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2669 					       bool enable)
2670 {
2671 	u32 tmp;
2672 
2673 	/* These interrupts should be enabled to drive DS clock */
2674 
2675 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2676 
2677 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2678 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2679 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2680 	if(adev->gfx.num_gfx_rings)
2681 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2682 
2683 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2684 }
2685 
2686 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2687 {
2688 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2689 	/* csib */
2690 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2691 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2692 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2693 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2694 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2695 			adev->gfx.rlc.clear_state_size);
2696 }
2697 
2698 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2699 				int indirect_offset,
2700 				int list_size,
2701 				int *unique_indirect_regs,
2702 				int unique_indirect_reg_count,
2703 				int *indirect_start_offsets,
2704 				int *indirect_start_offsets_count,
2705 				int max_start_offsets_count)
2706 {
2707 	int idx;
2708 
2709 	for (; indirect_offset < list_size; indirect_offset++) {
2710 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2711 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2712 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2713 
2714 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2715 			indirect_offset += 2;
2716 
2717 			/* look for the matching indice */
2718 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2719 				if (unique_indirect_regs[idx] ==
2720 					register_list_format[indirect_offset] ||
2721 					!unique_indirect_regs[idx])
2722 					break;
2723 			}
2724 
2725 			BUG_ON(idx >= unique_indirect_reg_count);
2726 
2727 			if (!unique_indirect_regs[idx])
2728 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2729 
2730 			indirect_offset++;
2731 		}
2732 	}
2733 }
2734 
2735 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2736 {
2737 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2738 	int unique_indirect_reg_count = 0;
2739 
2740 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2741 	int indirect_start_offsets_count = 0;
2742 
2743 	int list_size = 0;
2744 	int i = 0, j = 0;
2745 	u32 tmp = 0;
2746 
2747 	u32 *register_list_format =
2748 		kmemdup(adev->gfx.rlc.register_list_format,
2749 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2750 	if (!register_list_format)
2751 		return -ENOMEM;
2752 
2753 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2754 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2755 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2756 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2757 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2758 				    unique_indirect_regs,
2759 				    unique_indirect_reg_count,
2760 				    indirect_start_offsets,
2761 				    &indirect_start_offsets_count,
2762 				    ARRAY_SIZE(indirect_start_offsets));
2763 
2764 	/* enable auto inc in case it is disabled */
2765 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2766 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2767 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2768 
2769 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2770 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2771 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2772 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2773 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2774 			adev->gfx.rlc.register_restore[i]);
2775 
2776 	/* load indirect register */
2777 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2778 		adev->gfx.rlc.reg_list_format_start);
2779 
2780 	/* direct register portion */
2781 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2782 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2783 			register_list_format[i]);
2784 
2785 	/* indirect register portion */
2786 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2787 		if (register_list_format[i] == 0xFFFFFFFF) {
2788 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2789 			continue;
2790 		}
2791 
2792 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2793 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2794 
2795 		for (j = 0; j < unique_indirect_reg_count; j++) {
2796 			if (register_list_format[i] == unique_indirect_regs[j]) {
2797 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2798 				break;
2799 			}
2800 		}
2801 
2802 		BUG_ON(j >= unique_indirect_reg_count);
2803 
2804 		i++;
2805 	}
2806 
2807 	/* set save/restore list size */
2808 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2809 	list_size = list_size >> 1;
2810 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2811 		adev->gfx.rlc.reg_restore_list_size);
2812 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2813 
2814 	/* write the starting offsets to RLC scratch ram */
2815 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2816 		adev->gfx.rlc.starting_offsets_start);
2817 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2818 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2819 		       indirect_start_offsets[i]);
2820 
2821 	/* load unique indirect regs*/
2822 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2823 		if (unique_indirect_regs[i] != 0) {
2824 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2825 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2826 			       unique_indirect_regs[i] & 0x3FFFF);
2827 
2828 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2829 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2830 			       unique_indirect_regs[i] >> 20);
2831 		}
2832 	}
2833 
2834 	kfree(register_list_format);
2835 	return 0;
2836 }
2837 
2838 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2839 {
2840 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2841 }
2842 
2843 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2844 					     bool enable)
2845 {
2846 	uint32_t data = 0;
2847 	uint32_t default_data = 0;
2848 
2849 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2850 	if (enable) {
2851 		/* enable GFXIP control over CGPG */
2852 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2853 		if(default_data != data)
2854 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2855 
2856 		/* update status */
2857 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2858 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2859 		if(default_data != data)
2860 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2861 	} else {
2862 		/* restore GFXIP control over GCPG */
2863 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2864 		if(default_data != data)
2865 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2866 	}
2867 }
2868 
2869 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2870 {
2871 	uint32_t data = 0;
2872 
2873 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2874 			      AMD_PG_SUPPORT_GFX_SMG |
2875 			      AMD_PG_SUPPORT_GFX_DMG)) {
2876 		/* init IDLE_POLL_COUNT = 60 */
2877 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2878 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2879 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2880 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2881 
2882 		/* init RLC PG Delay */
2883 		data = 0;
2884 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2885 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2886 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2887 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2888 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2889 
2890 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2891 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2892 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2893 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2894 
2895 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2896 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2897 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2898 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2899 
2900 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2901 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2902 
2903 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2904 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2905 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2906 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2907 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2908 	}
2909 }
2910 
2911 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2912 						bool enable)
2913 {
2914 	uint32_t data = 0;
2915 	uint32_t default_data = 0;
2916 
2917 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2918 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2919 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2920 			     enable ? 1 : 0);
2921 	if (default_data != data)
2922 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2923 }
2924 
2925 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2926 						bool enable)
2927 {
2928 	uint32_t data = 0;
2929 	uint32_t default_data = 0;
2930 
2931 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2932 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2933 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2934 			     enable ? 1 : 0);
2935 	if(default_data != data)
2936 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2937 }
2938 
2939 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2940 					bool enable)
2941 {
2942 	uint32_t data = 0;
2943 	uint32_t default_data = 0;
2944 
2945 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947 			     CP_PG_DISABLE,
2948 			     enable ? 0 : 1);
2949 	if(default_data != data)
2950 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2951 }
2952 
2953 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2954 						bool enable)
2955 {
2956 	uint32_t data, default_data;
2957 
2958 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2959 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2960 			     GFX_POWER_GATING_ENABLE,
2961 			     enable ? 1 : 0);
2962 	if(default_data != data)
2963 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2964 }
2965 
2966 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2967 						bool enable)
2968 {
2969 	uint32_t data, default_data;
2970 
2971 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2972 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2973 			     GFX_PIPELINE_PG_ENABLE,
2974 			     enable ? 1 : 0);
2975 	if(default_data != data)
2976 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2977 
2978 	if (!enable)
2979 		/* read any GFX register to wake up GFX */
2980 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2981 }
2982 
2983 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2984 						       bool enable)
2985 {
2986 	uint32_t data, default_data;
2987 
2988 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2989 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2990 			     STATIC_PER_CU_PG_ENABLE,
2991 			     enable ? 1 : 0);
2992 	if(default_data != data)
2993 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2994 }
2995 
2996 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2997 						bool enable)
2998 {
2999 	uint32_t data, default_data;
3000 
3001 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3002 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3003 			     DYN_PER_CU_PG_ENABLE,
3004 			     enable ? 1 : 0);
3005 	if(default_data != data)
3006 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3007 }
3008 
3009 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3010 {
3011 	gfx_v9_0_init_csb(adev);
3012 
3013 	/*
3014 	 * Rlc save restore list is workable since v2_1.
3015 	 * And it's needed by gfxoff feature.
3016 	 */
3017 	if (adev->gfx.rlc.is_rlc_v2_1) {
3018 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3019 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3020 			gfx_v9_1_init_rlc_save_restore_list(adev);
3021 		gfx_v9_0_enable_save_restore_machine(adev);
3022 	}
3023 
3024 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3025 			      AMD_PG_SUPPORT_GFX_SMG |
3026 			      AMD_PG_SUPPORT_GFX_DMG |
3027 			      AMD_PG_SUPPORT_CP |
3028 			      AMD_PG_SUPPORT_GDS |
3029 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3030 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3031 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3032 		gfx_v9_0_init_gfx_power_gating(adev);
3033 	}
3034 }
3035 
3036 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3037 {
3038 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3039 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3040 	gfx_v9_0_wait_for_rlc_serdes(adev);
3041 }
3042 
3043 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3044 {
3045 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3046 	udelay(50);
3047 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3048 	udelay(50);
3049 }
3050 
3051 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3052 {
3053 #ifdef AMDGPU_RLC_DEBUG_RETRY
3054 	u32 rlc_ucode_ver;
3055 #endif
3056 
3057 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3058 	udelay(50);
3059 
3060 	/* carrizo do enable cp interrupt after cp inited */
3061 	if (!(adev->flags & AMD_IS_APU)) {
3062 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3063 		udelay(50);
3064 	}
3065 
3066 #ifdef AMDGPU_RLC_DEBUG_RETRY
3067 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3068 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3069 	if(rlc_ucode_ver == 0x108) {
3070 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3071 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3072 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3073 		 * default is 0x9C4 to create a 100us interval */
3074 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3075 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3076 		 * to disable the page fault retry interrupts, default is
3077 		 * 0x100 (256) */
3078 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3079 	}
3080 #endif
3081 }
3082 
3083 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3084 {
3085 	const struct rlc_firmware_header_v2_0 *hdr;
3086 	const __le32 *fw_data;
3087 	unsigned i, fw_size;
3088 
3089 	if (!adev->gfx.rlc_fw)
3090 		return -EINVAL;
3091 
3092 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3093 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3094 
3095 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3096 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3097 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3098 
3099 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3100 			RLCG_UCODE_LOADING_START_ADDRESS);
3101 	for (i = 0; i < fw_size; i++)
3102 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3103 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3104 
3105 	return 0;
3106 }
3107 
3108 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3109 {
3110 	int r;
3111 
3112 	if (amdgpu_sriov_vf(adev)) {
3113 		gfx_v9_0_init_csb(adev);
3114 		return 0;
3115 	}
3116 
3117 	adev->gfx.rlc.funcs->stop(adev);
3118 
3119 	/* disable CG */
3120 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3121 
3122 	gfx_v9_0_init_pg(adev);
3123 
3124 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3125 		/* legacy rlc firmware loading */
3126 		r = gfx_v9_0_rlc_load_microcode(adev);
3127 		if (r)
3128 			return r;
3129 	}
3130 
3131 	switch (adev->ip_versions[GC_HWIP][0]) {
3132 	case IP_VERSION(9, 2, 2):
3133 	case IP_VERSION(9, 1, 0):
3134 		if (amdgpu_lbpw == 0)
3135 			gfx_v9_0_enable_lbpw(adev, false);
3136 		else
3137 			gfx_v9_0_enable_lbpw(adev, true);
3138 		break;
3139 	case IP_VERSION(9, 4, 0):
3140 		if (amdgpu_lbpw > 0)
3141 			gfx_v9_0_enable_lbpw(adev, true);
3142 		else
3143 			gfx_v9_0_enable_lbpw(adev, false);
3144 		break;
3145 	default:
3146 		break;
3147 	}
3148 
3149 	adev->gfx.rlc.funcs->start(adev);
3150 
3151 	return 0;
3152 }
3153 
3154 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3155 {
3156 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3157 
3158 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3159 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3160 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3161 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3162 	udelay(50);
3163 }
3164 
3165 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3166 {
3167 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3168 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3169 	const struct gfx_firmware_header_v1_0 *me_hdr;
3170 	const __le32 *fw_data;
3171 	unsigned i, fw_size;
3172 
3173 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3174 		return -EINVAL;
3175 
3176 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3177 		adev->gfx.pfp_fw->data;
3178 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3179 		adev->gfx.ce_fw->data;
3180 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3181 		adev->gfx.me_fw->data;
3182 
3183 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3184 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3185 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3186 
3187 	gfx_v9_0_cp_gfx_enable(adev, false);
3188 
3189 	/* PFP */
3190 	fw_data = (const __le32 *)
3191 		(adev->gfx.pfp_fw->data +
3192 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3193 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3194 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3195 	for (i = 0; i < fw_size; i++)
3196 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3197 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3198 
3199 	/* CE */
3200 	fw_data = (const __le32 *)
3201 		(adev->gfx.ce_fw->data +
3202 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3203 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3204 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3205 	for (i = 0; i < fw_size; i++)
3206 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3207 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3208 
3209 	/* ME */
3210 	fw_data = (const __le32 *)
3211 		(adev->gfx.me_fw->data +
3212 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3213 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3214 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3215 	for (i = 0; i < fw_size; i++)
3216 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3217 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3218 
3219 	return 0;
3220 }
3221 
3222 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3223 {
3224 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3225 	const struct cs_section_def *sect = NULL;
3226 	const struct cs_extent_def *ext = NULL;
3227 	int r, i, tmp;
3228 
3229 	/* init the CP */
3230 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3231 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3232 
3233 	gfx_v9_0_cp_gfx_enable(adev, true);
3234 
3235 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3236 	if (r) {
3237 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3238 		return r;
3239 	}
3240 
3241 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3242 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3243 
3244 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3245 	amdgpu_ring_write(ring, 0x80000000);
3246 	amdgpu_ring_write(ring, 0x80000000);
3247 
3248 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3249 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3250 			if (sect->id == SECT_CONTEXT) {
3251 				amdgpu_ring_write(ring,
3252 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3253 					       ext->reg_count));
3254 				amdgpu_ring_write(ring,
3255 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3256 				for (i = 0; i < ext->reg_count; i++)
3257 					amdgpu_ring_write(ring, ext->extent[i]);
3258 			}
3259 		}
3260 	}
3261 
3262 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3263 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3264 
3265 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3266 	amdgpu_ring_write(ring, 0);
3267 
3268 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3269 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3270 	amdgpu_ring_write(ring, 0x8000);
3271 	amdgpu_ring_write(ring, 0x8000);
3272 
3273 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3274 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3275 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3276 	amdgpu_ring_write(ring, tmp);
3277 	amdgpu_ring_write(ring, 0);
3278 
3279 	amdgpu_ring_commit(ring);
3280 
3281 	return 0;
3282 }
3283 
3284 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3285 {
3286 	struct amdgpu_ring *ring;
3287 	u32 tmp;
3288 	u32 rb_bufsz;
3289 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3290 
3291 	/* Set the write pointer delay */
3292 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3293 
3294 	/* set the RB to use vmid 0 */
3295 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3296 
3297 	/* Set ring buffer size */
3298 	ring = &adev->gfx.gfx_ring[0];
3299 	rb_bufsz = order_base_2(ring->ring_size / 8);
3300 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3301 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3302 #ifdef __BIG_ENDIAN
3303 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3304 #endif
3305 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3306 
3307 	/* Initialize the ring buffer's write pointers */
3308 	ring->wptr = 0;
3309 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3310 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3311 
3312 	/* set the wb address wether it's enabled or not */
3313 	rptr_addr = ring->rptr_gpu_addr;
3314 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3315 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3316 
3317 	wptr_gpu_addr = ring->wptr_gpu_addr;
3318 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3319 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3320 
3321 	mdelay(1);
3322 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3323 
3324 	rb_addr = ring->gpu_addr >> 8;
3325 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3326 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3327 
3328 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3329 	if (ring->use_doorbell) {
3330 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3331 				    DOORBELL_OFFSET, ring->doorbell_index);
3332 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3333 				    DOORBELL_EN, 1);
3334 	} else {
3335 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3336 	}
3337 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3338 
3339 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3340 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3341 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3342 
3343 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3344 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3345 
3346 
3347 	/* start the ring */
3348 	gfx_v9_0_cp_gfx_start(adev);
3349 	ring->sched.ready = true;
3350 
3351 	return 0;
3352 }
3353 
3354 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3355 {
3356 	if (enable) {
3357 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3358 	} else {
3359 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3360 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3361 		adev->gfx.kiq.ring.sched.ready = false;
3362 	}
3363 	udelay(50);
3364 }
3365 
3366 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3367 {
3368 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3369 	const __le32 *fw_data;
3370 	unsigned i;
3371 	u32 tmp;
3372 
3373 	if (!adev->gfx.mec_fw)
3374 		return -EINVAL;
3375 
3376 	gfx_v9_0_cp_compute_enable(adev, false);
3377 
3378 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3379 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3380 
3381 	fw_data = (const __le32 *)
3382 		(adev->gfx.mec_fw->data +
3383 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3384 	tmp = 0;
3385 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3386 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3387 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3388 
3389 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3390 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3391 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3392 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3393 
3394 	/* MEC1 */
3395 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3396 			 mec_hdr->jt_offset);
3397 	for (i = 0; i < mec_hdr->jt_size; i++)
3398 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3399 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3400 
3401 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3402 			adev->gfx.mec_fw_version);
3403 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3404 
3405 	return 0;
3406 }
3407 
3408 /* KIQ functions */
3409 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3410 {
3411 	uint32_t tmp;
3412 	struct amdgpu_device *adev = ring->adev;
3413 
3414 	/* tell RLC which is KIQ queue */
3415 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3416 	tmp &= 0xffffff00;
3417 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3418 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3419 	tmp |= 0x80;
3420 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3421 }
3422 
3423 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3424 {
3425 	struct amdgpu_device *adev = ring->adev;
3426 
3427 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3428 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3429 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3430 			mqd->cp_hqd_queue_priority =
3431 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3432 		}
3433 	}
3434 }
3435 
3436 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3437 {
3438 	struct amdgpu_device *adev = ring->adev;
3439 	struct v9_mqd *mqd = ring->mqd_ptr;
3440 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3441 	uint32_t tmp;
3442 
3443 	mqd->header = 0xC0310800;
3444 	mqd->compute_pipelinestat_enable = 0x00000001;
3445 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3446 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3447 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3448 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3449 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3450 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3451 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3452 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3453 	mqd->compute_misc_reserved = 0x00000003;
3454 
3455 	mqd->dynamic_cu_mask_addr_lo =
3456 		lower_32_bits(ring->mqd_gpu_addr
3457 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3458 	mqd->dynamic_cu_mask_addr_hi =
3459 		upper_32_bits(ring->mqd_gpu_addr
3460 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3461 
3462 	eop_base_addr = ring->eop_gpu_addr >> 8;
3463 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3464 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3465 
3466 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3467 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3468 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3469 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3470 
3471 	mqd->cp_hqd_eop_control = tmp;
3472 
3473 	/* enable doorbell? */
3474 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3475 
3476 	if (ring->use_doorbell) {
3477 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3478 				    DOORBELL_OFFSET, ring->doorbell_index);
3479 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3480 				    DOORBELL_EN, 1);
3481 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3482 				    DOORBELL_SOURCE, 0);
3483 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3484 				    DOORBELL_HIT, 0);
3485 	} else {
3486 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3487 					 DOORBELL_EN, 0);
3488 	}
3489 
3490 	mqd->cp_hqd_pq_doorbell_control = tmp;
3491 
3492 	/* disable the queue if it's active */
3493 	ring->wptr = 0;
3494 	mqd->cp_hqd_dequeue_request = 0;
3495 	mqd->cp_hqd_pq_rptr = 0;
3496 	mqd->cp_hqd_pq_wptr_lo = 0;
3497 	mqd->cp_hqd_pq_wptr_hi = 0;
3498 
3499 	/* set the pointer to the MQD */
3500 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3501 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3502 
3503 	/* set MQD vmid to 0 */
3504 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3505 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3506 	mqd->cp_mqd_control = tmp;
3507 
3508 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3509 	hqd_gpu_addr = ring->gpu_addr >> 8;
3510 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3511 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3512 
3513 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3514 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3515 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3516 			    (order_base_2(ring->ring_size / 4) - 1));
3517 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3518 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3519 #ifdef __BIG_ENDIAN
3520 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3521 #endif
3522 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3523 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3524 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3525 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3526 	mqd->cp_hqd_pq_control = tmp;
3527 
3528 	/* set the wb address whether it's enabled or not */
3529 	wb_gpu_addr = ring->rptr_gpu_addr;
3530 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3531 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3532 		upper_32_bits(wb_gpu_addr) & 0xffff;
3533 
3534 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3535 	wb_gpu_addr = ring->wptr_gpu_addr;
3536 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3537 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3538 
3539 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3540 	ring->wptr = 0;
3541 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3542 
3543 	/* set the vmid for the queue */
3544 	mqd->cp_hqd_vmid = 0;
3545 
3546 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3547 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3548 	mqd->cp_hqd_persistent_state = tmp;
3549 
3550 	/* set MIN_IB_AVAIL_SIZE */
3551 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3552 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3553 	mqd->cp_hqd_ib_control = tmp;
3554 
3555 	/* set static priority for a queue/ring */
3556 	gfx_v9_0_mqd_set_priority(ring, mqd);
3557 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3558 
3559 	/* map_queues packet doesn't need activate the queue,
3560 	 * so only kiq need set this field.
3561 	 */
3562 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3563 		mqd->cp_hqd_active = 1;
3564 
3565 	return 0;
3566 }
3567 
3568 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3569 {
3570 	struct amdgpu_device *adev = ring->adev;
3571 	struct v9_mqd *mqd = ring->mqd_ptr;
3572 	int j;
3573 
3574 	/* disable wptr polling */
3575 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3576 
3577 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3578 	       mqd->cp_hqd_eop_base_addr_lo);
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3580 	       mqd->cp_hqd_eop_base_addr_hi);
3581 
3582 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3583 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3584 	       mqd->cp_hqd_eop_control);
3585 
3586 	/* enable doorbell? */
3587 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3588 	       mqd->cp_hqd_pq_doorbell_control);
3589 
3590 	/* disable the queue if it's active */
3591 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3592 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3593 		for (j = 0; j < adev->usec_timeout; j++) {
3594 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3595 				break;
3596 			udelay(1);
3597 		}
3598 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3599 		       mqd->cp_hqd_dequeue_request);
3600 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3601 		       mqd->cp_hqd_pq_rptr);
3602 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3603 		       mqd->cp_hqd_pq_wptr_lo);
3604 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3605 		       mqd->cp_hqd_pq_wptr_hi);
3606 	}
3607 
3608 	/* set the pointer to the MQD */
3609 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3610 	       mqd->cp_mqd_base_addr_lo);
3611 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3612 	       mqd->cp_mqd_base_addr_hi);
3613 
3614 	/* set MQD vmid to 0 */
3615 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3616 	       mqd->cp_mqd_control);
3617 
3618 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3620 	       mqd->cp_hqd_pq_base_lo);
3621 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3622 	       mqd->cp_hqd_pq_base_hi);
3623 
3624 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3625 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3626 	       mqd->cp_hqd_pq_control);
3627 
3628 	/* set the wb address whether it's enabled or not */
3629 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3630 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3631 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3632 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3633 
3634 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3635 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3636 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3637 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3638 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3639 
3640 	/* enable the doorbell if requested */
3641 	if (ring->use_doorbell) {
3642 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3643 					(adev->doorbell_index.kiq * 2) << 2);
3644 		/* If GC has entered CGPG, ringing doorbell > first page
3645 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3646 		 * workaround this issue. And this change has to align with firmware
3647 		 * update.
3648 		 */
3649 		if (check_if_enlarge_doorbell_range(adev))
3650 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3651 					(adev->doorbell.size - 4));
3652 		else
3653 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3654 					(adev->doorbell_index.userqueue_end * 2) << 2);
3655 	}
3656 
3657 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3658 	       mqd->cp_hqd_pq_doorbell_control);
3659 
3660 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3661 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3662 	       mqd->cp_hqd_pq_wptr_lo);
3663 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3664 	       mqd->cp_hqd_pq_wptr_hi);
3665 
3666 	/* set the vmid for the queue */
3667 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3668 
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3670 	       mqd->cp_hqd_persistent_state);
3671 
3672 	/* activate the queue */
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3674 	       mqd->cp_hqd_active);
3675 
3676 	if (ring->use_doorbell)
3677 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3678 
3679 	return 0;
3680 }
3681 
3682 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3683 {
3684 	struct amdgpu_device *adev = ring->adev;
3685 	int j;
3686 
3687 	/* disable the queue if it's active */
3688 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3689 
3690 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3691 
3692 		for (j = 0; j < adev->usec_timeout; j++) {
3693 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3694 				break;
3695 			udelay(1);
3696 		}
3697 
3698 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3699 			DRM_DEBUG("KIQ dequeue request failed.\n");
3700 
3701 			/* Manual disable if dequeue request times out */
3702 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3703 		}
3704 
3705 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3706 		      0);
3707 	}
3708 
3709 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3710 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3711 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3712 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3713 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3714 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3715 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3716 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3717 
3718 	return 0;
3719 }
3720 
3721 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3722 {
3723 	struct amdgpu_device *adev = ring->adev;
3724 	struct v9_mqd *mqd = ring->mqd_ptr;
3725 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3726 	struct v9_mqd *tmp_mqd;
3727 
3728 	gfx_v9_0_kiq_setting(ring);
3729 
3730 	/* GPU could be in bad state during probe, driver trigger the reset
3731 	 * after load the SMU, in this case , the mqd is not be initialized.
3732 	 * driver need to re-init the mqd.
3733 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3734 	 */
3735 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3736 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3737 		/* for GPU_RESET case , reset MQD to a clean status */
3738 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3739 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3740 
3741 		/* reset ring buffer */
3742 		ring->wptr = 0;
3743 		amdgpu_ring_clear_ring(ring);
3744 
3745 		mutex_lock(&adev->srbm_mutex);
3746 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3747 		gfx_v9_0_kiq_init_register(ring);
3748 		soc15_grbm_select(adev, 0, 0, 0, 0);
3749 		mutex_unlock(&adev->srbm_mutex);
3750 	} else {
3751 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3752 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3753 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3754 		mutex_lock(&adev->srbm_mutex);
3755 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3756 		gfx_v9_0_mqd_init(ring);
3757 		gfx_v9_0_kiq_init_register(ring);
3758 		soc15_grbm_select(adev, 0, 0, 0, 0);
3759 		mutex_unlock(&adev->srbm_mutex);
3760 
3761 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3762 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3763 	}
3764 
3765 	return 0;
3766 }
3767 
3768 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3769 {
3770 	struct amdgpu_device *adev = ring->adev;
3771 	struct v9_mqd *mqd = ring->mqd_ptr;
3772 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3773 	struct v9_mqd *tmp_mqd;
3774 
3775 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3776 	 * is not be initialized before
3777 	 */
3778 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3779 
3780 	if (!tmp_mqd->cp_hqd_pq_control ||
3781 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3782 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3783 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3784 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3785 		mutex_lock(&adev->srbm_mutex);
3786 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3787 		gfx_v9_0_mqd_init(ring);
3788 		soc15_grbm_select(adev, 0, 0, 0, 0);
3789 		mutex_unlock(&adev->srbm_mutex);
3790 
3791 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3792 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3793 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3794 		/* reset MQD to a clean status */
3795 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3796 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3797 
3798 		/* reset ring buffer */
3799 		ring->wptr = 0;
3800 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3801 		amdgpu_ring_clear_ring(ring);
3802 	} else {
3803 		amdgpu_ring_clear_ring(ring);
3804 	}
3805 
3806 	return 0;
3807 }
3808 
3809 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3810 {
3811 	struct amdgpu_ring *ring;
3812 	int r;
3813 
3814 	ring = &adev->gfx.kiq.ring;
3815 
3816 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3817 	if (unlikely(r != 0))
3818 		return r;
3819 
3820 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3821 	if (unlikely(r != 0))
3822 		return r;
3823 
3824 	gfx_v9_0_kiq_init_queue(ring);
3825 	amdgpu_bo_kunmap(ring->mqd_obj);
3826 	ring->mqd_ptr = NULL;
3827 	amdgpu_bo_unreserve(ring->mqd_obj);
3828 	ring->sched.ready = true;
3829 	return 0;
3830 }
3831 
3832 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3833 {
3834 	struct amdgpu_ring *ring = NULL;
3835 	int r = 0, i;
3836 
3837 	gfx_v9_0_cp_compute_enable(adev, true);
3838 
3839 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3840 		ring = &adev->gfx.compute_ring[i];
3841 
3842 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3843 		if (unlikely(r != 0))
3844 			goto done;
3845 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3846 		if (!r) {
3847 			r = gfx_v9_0_kcq_init_queue(ring);
3848 			amdgpu_bo_kunmap(ring->mqd_obj);
3849 			ring->mqd_ptr = NULL;
3850 		}
3851 		amdgpu_bo_unreserve(ring->mqd_obj);
3852 		if (r)
3853 			goto done;
3854 	}
3855 
3856 	r = amdgpu_gfx_enable_kcq(adev);
3857 done:
3858 	return r;
3859 }
3860 
3861 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3862 {
3863 	int r, i;
3864 	struct amdgpu_ring *ring;
3865 
3866 	if (!(adev->flags & AMD_IS_APU))
3867 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3868 
3869 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3870 		if (adev->gfx.num_gfx_rings) {
3871 			/* legacy firmware loading */
3872 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3873 			if (r)
3874 				return r;
3875 		}
3876 
3877 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3878 		if (r)
3879 			return r;
3880 	}
3881 
3882 	r = gfx_v9_0_kiq_resume(adev);
3883 	if (r)
3884 		return r;
3885 
3886 	if (adev->gfx.num_gfx_rings) {
3887 		r = gfx_v9_0_cp_gfx_resume(adev);
3888 		if (r)
3889 			return r;
3890 	}
3891 
3892 	r = gfx_v9_0_kcq_resume(adev);
3893 	if (r)
3894 		return r;
3895 
3896 	if (adev->gfx.num_gfx_rings) {
3897 		ring = &adev->gfx.gfx_ring[0];
3898 		r = amdgpu_ring_test_helper(ring);
3899 		if (r)
3900 			return r;
3901 	}
3902 
3903 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3904 		ring = &adev->gfx.compute_ring[i];
3905 		amdgpu_ring_test_helper(ring);
3906 	}
3907 
3908 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3909 
3910 	return 0;
3911 }
3912 
3913 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3914 {
3915 	u32 tmp;
3916 
3917 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3918 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3919 		return;
3920 
3921 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3922 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3923 				adev->df.hash_status.hash_64k);
3924 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3925 				adev->df.hash_status.hash_2m);
3926 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3927 				adev->df.hash_status.hash_1g);
3928 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3929 }
3930 
3931 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3932 {
3933 	if (adev->gfx.num_gfx_rings)
3934 		gfx_v9_0_cp_gfx_enable(adev, enable);
3935 	gfx_v9_0_cp_compute_enable(adev, enable);
3936 }
3937 
3938 static int gfx_v9_0_hw_init(void *handle)
3939 {
3940 	int r;
3941 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3942 
3943 	if (!amdgpu_sriov_vf(adev))
3944 		gfx_v9_0_init_golden_registers(adev);
3945 
3946 	gfx_v9_0_constants_init(adev);
3947 
3948 	gfx_v9_0_init_tcp_config(adev);
3949 
3950 	r = adev->gfx.rlc.funcs->resume(adev);
3951 	if (r)
3952 		return r;
3953 
3954 	r = gfx_v9_0_cp_resume(adev);
3955 	if (r)
3956 		return r;
3957 
3958 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3959 		gfx_v9_4_2_set_power_brake_sequence(adev);
3960 
3961 	return r;
3962 }
3963 
3964 static int gfx_v9_0_hw_fini(void *handle)
3965 {
3966 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3967 
3968 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3969 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3970 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3971 
3972 	/* DF freeze and kcq disable will fail */
3973 	if (!amdgpu_ras_intr_triggered())
3974 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3975 		amdgpu_gfx_disable_kcq(adev);
3976 
3977 	if (amdgpu_sriov_vf(adev)) {
3978 		gfx_v9_0_cp_gfx_enable(adev, false);
3979 		/* must disable polling for SRIOV when hw finished, otherwise
3980 		 * CPC engine may still keep fetching WB address which is already
3981 		 * invalid after sw finished and trigger DMAR reading error in
3982 		 * hypervisor side.
3983 		 */
3984 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3985 		return 0;
3986 	}
3987 
3988 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3989 	 * otherwise KIQ is hanging when binding back
3990 	 */
3991 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3992 		mutex_lock(&adev->srbm_mutex);
3993 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3994 				adev->gfx.kiq.ring.pipe,
3995 				adev->gfx.kiq.ring.queue, 0);
3996 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3997 		soc15_grbm_select(adev, 0, 0, 0, 0);
3998 		mutex_unlock(&adev->srbm_mutex);
3999 	}
4000 
4001 	gfx_v9_0_cp_enable(adev, false);
4002 
4003 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4004 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4005 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4006 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4007 		return 0;
4008 	}
4009 
4010 	adev->gfx.rlc.funcs->stop(adev);
4011 	return 0;
4012 }
4013 
4014 static int gfx_v9_0_suspend(void *handle)
4015 {
4016 	return gfx_v9_0_hw_fini(handle);
4017 }
4018 
4019 static int gfx_v9_0_resume(void *handle)
4020 {
4021 	return gfx_v9_0_hw_init(handle);
4022 }
4023 
4024 static bool gfx_v9_0_is_idle(void *handle)
4025 {
4026 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4027 
4028 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4029 				GRBM_STATUS, GUI_ACTIVE))
4030 		return false;
4031 	else
4032 		return true;
4033 }
4034 
4035 static int gfx_v9_0_wait_for_idle(void *handle)
4036 {
4037 	unsigned i;
4038 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4039 
4040 	for (i = 0; i < adev->usec_timeout; i++) {
4041 		if (gfx_v9_0_is_idle(handle))
4042 			return 0;
4043 		udelay(1);
4044 	}
4045 	return -ETIMEDOUT;
4046 }
4047 
4048 static int gfx_v9_0_soft_reset(void *handle)
4049 {
4050 	u32 grbm_soft_reset = 0;
4051 	u32 tmp;
4052 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4053 
4054 	/* GRBM_STATUS */
4055 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4056 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4057 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4058 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4059 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4060 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4061 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4062 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4063 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4064 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4065 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4066 	}
4067 
4068 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4069 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4070 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4071 	}
4072 
4073 	/* GRBM_STATUS2 */
4074 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4075 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4076 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4077 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4078 
4079 
4080 	if (grbm_soft_reset) {
4081 		/* stop the rlc */
4082 		adev->gfx.rlc.funcs->stop(adev);
4083 
4084 		if (adev->gfx.num_gfx_rings)
4085 			/* Disable GFX parsing/prefetching */
4086 			gfx_v9_0_cp_gfx_enable(adev, false);
4087 
4088 		/* Disable MEC parsing/prefetching */
4089 		gfx_v9_0_cp_compute_enable(adev, false);
4090 
4091 		if (grbm_soft_reset) {
4092 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4093 			tmp |= grbm_soft_reset;
4094 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4095 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4096 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4097 
4098 			udelay(50);
4099 
4100 			tmp &= ~grbm_soft_reset;
4101 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4102 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4103 		}
4104 
4105 		/* Wait a little for things to settle down */
4106 		udelay(50);
4107 	}
4108 	return 0;
4109 }
4110 
4111 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4112 {
4113 	signed long r, cnt = 0;
4114 	unsigned long flags;
4115 	uint32_t seq, reg_val_offs = 0;
4116 	uint64_t value = 0;
4117 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4118 	struct amdgpu_ring *ring = &kiq->ring;
4119 
4120 	BUG_ON(!ring->funcs->emit_rreg);
4121 
4122 	spin_lock_irqsave(&kiq->ring_lock, flags);
4123 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4124 		pr_err("critical bug! too many kiq readers\n");
4125 		goto failed_unlock;
4126 	}
4127 	amdgpu_ring_alloc(ring, 32);
4128 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4129 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4130 				(5 << 8) |	/* dst: memory */
4131 				(1 << 16) |	/* count sel */
4132 				(1 << 20));	/* write confirm */
4133 	amdgpu_ring_write(ring, 0);
4134 	amdgpu_ring_write(ring, 0);
4135 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4136 				reg_val_offs * 4));
4137 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4138 				reg_val_offs * 4));
4139 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4140 	if (r)
4141 		goto failed_undo;
4142 
4143 	amdgpu_ring_commit(ring);
4144 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4145 
4146 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4147 
4148 	/* don't wait anymore for gpu reset case because this way may
4149 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4150 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4151 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4152 	 * gpu_recover() hang there.
4153 	 *
4154 	 * also don't wait anymore for IRQ context
4155 	 * */
4156 	if (r < 1 && (amdgpu_in_reset(adev)))
4157 		goto failed_kiq_read;
4158 
4159 	might_sleep();
4160 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4161 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4162 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4163 	}
4164 
4165 	if (cnt > MAX_KIQ_REG_TRY)
4166 		goto failed_kiq_read;
4167 
4168 	mb();
4169 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4170 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4171 	amdgpu_device_wb_free(adev, reg_val_offs);
4172 	return value;
4173 
4174 failed_undo:
4175 	amdgpu_ring_undo(ring);
4176 failed_unlock:
4177 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4178 failed_kiq_read:
4179 	if (reg_val_offs)
4180 		amdgpu_device_wb_free(adev, reg_val_offs);
4181 	pr_err("failed to read gpu clock\n");
4182 	return ~0;
4183 }
4184 
4185 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 {
4187 	uint64_t clock, clock_lo, clock_hi, hi_check;
4188 
4189 	switch (adev->ip_versions[GC_HWIP][0]) {
4190 	case IP_VERSION(9, 3, 0):
4191 		preempt_disable();
4192 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4193 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4194 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4195 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4196 		 * roughly every 42 seconds.
4197 		 */
4198 		if (hi_check != clock_hi) {
4199 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4200 			clock_hi = hi_check;
4201 		}
4202 		preempt_enable();
4203 		clock = clock_lo | (clock_hi << 32ULL);
4204 		break;
4205 	default:
4206 		amdgpu_gfx_off_ctrl(adev, false);
4207 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4208 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4209 			clock = gfx_v9_0_kiq_read_clock(adev);
4210 		} else {
4211 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4212 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4213 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4214 		}
4215 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4216 		amdgpu_gfx_off_ctrl(adev, true);
4217 		break;
4218 	}
4219 	return clock;
4220 }
4221 
4222 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4223 					  uint32_t vmid,
4224 					  uint32_t gds_base, uint32_t gds_size,
4225 					  uint32_t gws_base, uint32_t gws_size,
4226 					  uint32_t oa_base, uint32_t oa_size)
4227 {
4228 	struct amdgpu_device *adev = ring->adev;
4229 
4230 	/* GDS Base */
4231 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4232 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4233 				   gds_base);
4234 
4235 	/* GDS Size */
4236 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4237 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4238 				   gds_size);
4239 
4240 	/* GWS */
4241 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4242 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4243 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4244 
4245 	/* OA */
4246 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4247 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4248 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4249 }
4250 
4251 static const u32 vgpr_init_compute_shader[] =
4252 {
4253 	0xb07c0000, 0xbe8000ff,
4254 	0x000000f8, 0xbf110800,
4255 	0x7e000280, 0x7e020280,
4256 	0x7e040280, 0x7e060280,
4257 	0x7e080280, 0x7e0a0280,
4258 	0x7e0c0280, 0x7e0e0280,
4259 	0x80808800, 0xbe803200,
4260 	0xbf84fff5, 0xbf9c0000,
4261 	0xd28c0001, 0x0001007f,
4262 	0xd28d0001, 0x0002027e,
4263 	0x10020288, 0xb8810904,
4264 	0xb7814000, 0xd1196a01,
4265 	0x00000301, 0xbe800087,
4266 	0xbefc00c1, 0xd89c4000,
4267 	0x00020201, 0xd89cc080,
4268 	0x00040401, 0x320202ff,
4269 	0x00000800, 0x80808100,
4270 	0xbf84fff8, 0x7e020280,
4271 	0xbf810000, 0x00000000,
4272 };
4273 
4274 static const u32 sgpr_init_compute_shader[] =
4275 {
4276 	0xb07c0000, 0xbe8000ff,
4277 	0x0000005f, 0xbee50080,
4278 	0xbe812c65, 0xbe822c65,
4279 	0xbe832c65, 0xbe842c65,
4280 	0xbe852c65, 0xb77c0005,
4281 	0x80808500, 0xbf84fff8,
4282 	0xbe800080, 0xbf810000,
4283 };
4284 
4285 static const u32 vgpr_init_compute_shader_arcturus[] = {
4286 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4287 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4288 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4289 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4290 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4291 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4292 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4293 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4294 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4295 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4296 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4297 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4298 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4299 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4300 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4301 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4302 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4303 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4304 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4305 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4306 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4307 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4308 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4309 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4310 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4311 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4312 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4313 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4314 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4315 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4316 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4317 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4318 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4319 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4320 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4321 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4322 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4323 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4324 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4325 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4326 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4327 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4328 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4329 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4330 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4331 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4332 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4333 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4334 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4335 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4336 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4337 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4338 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4339 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4340 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4341 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4342 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4343 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4344 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4345 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4346 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4347 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4348 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4349 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4350 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4351 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4352 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4353 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4354 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4355 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4356 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4357 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4358 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4359 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4360 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4361 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4362 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4363 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4364 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4365 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4366 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4367 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4368 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4369 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4370 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4371 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4372 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4373 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4374 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4375 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4376 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4377 	0xbf84fff8, 0xbf810000,
4378 };
4379 
4380 /* When below register arrays changed, please update gpr_reg_size,
4381   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4382   to cover all gfx9 ASICs */
4383 static const struct soc15_reg_entry vgpr_init_regs[] = {
4384    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4385    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4386    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4387    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4388    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4389    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4390    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4391    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4392    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4393    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4394    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4395    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4396    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4397    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4398 };
4399 
4400 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4401    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4402    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4403    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4404    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4405    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4406    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4407    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4408    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4409    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4410    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4411    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4412    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4413    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4414    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4415 };
4416 
4417 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4431    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4432 };
4433 
4434 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4449 };
4450 
4451 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4452    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4453    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4454    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4455    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4456    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4457    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4458    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4459    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4460    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4461    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4462    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4463    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4464    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4465    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4466    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4467    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4468    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4469    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4470    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4471    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4472    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4473    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4474    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4475    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4476    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4477    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4478    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4479    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4480    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4481    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4482    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4483    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4484    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4485 };
4486 
4487 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4488 {
4489 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4490 	int i, r;
4491 
4492 	/* only support when RAS is enabled */
4493 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4494 		return 0;
4495 
4496 	r = amdgpu_ring_alloc(ring, 7);
4497 	if (r) {
4498 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4499 			ring->name, r);
4500 		return r;
4501 	}
4502 
4503 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4504 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4505 
4506 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4507 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4508 				PACKET3_DMA_DATA_DST_SEL(1) |
4509 				PACKET3_DMA_DATA_SRC_SEL(2) |
4510 				PACKET3_DMA_DATA_ENGINE(0)));
4511 	amdgpu_ring_write(ring, 0);
4512 	amdgpu_ring_write(ring, 0);
4513 	amdgpu_ring_write(ring, 0);
4514 	amdgpu_ring_write(ring, 0);
4515 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4516 				adev->gds.gds_size);
4517 
4518 	amdgpu_ring_commit(ring);
4519 
4520 	for (i = 0; i < adev->usec_timeout; i++) {
4521 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4522 			break;
4523 		udelay(1);
4524 	}
4525 
4526 	if (i >= adev->usec_timeout)
4527 		r = -ETIMEDOUT;
4528 
4529 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4530 
4531 	return r;
4532 }
4533 
4534 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4535 {
4536 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4537 	struct amdgpu_ib ib;
4538 	struct dma_fence *f = NULL;
4539 	int r, i;
4540 	unsigned total_size, vgpr_offset, sgpr_offset;
4541 	u64 gpu_addr;
4542 
4543 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4544 						adev->gfx.config.max_cu_per_sh *
4545 						adev->gfx.config.max_sh_per_se;
4546 	int sgpr_work_group_size = 5;
4547 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4548 	int vgpr_init_shader_size;
4549 	const u32 *vgpr_init_shader_ptr;
4550 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4551 
4552 	/* only support when RAS is enabled */
4553 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4554 		return 0;
4555 
4556 	/* bail if the compute ring is not ready */
4557 	if (!ring->sched.ready)
4558 		return 0;
4559 
4560 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4561 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4562 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4563 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4564 	} else {
4565 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4566 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4567 		vgpr_init_regs_ptr = vgpr_init_regs;
4568 	}
4569 
4570 	total_size =
4571 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4572 	total_size +=
4573 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4574 	total_size +=
4575 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4576 	total_size = ALIGN(total_size, 256);
4577 	vgpr_offset = total_size;
4578 	total_size += ALIGN(vgpr_init_shader_size, 256);
4579 	sgpr_offset = total_size;
4580 	total_size += sizeof(sgpr_init_compute_shader);
4581 
4582 	/* allocate an indirect buffer to put the commands in */
4583 	memset(&ib, 0, sizeof(ib));
4584 	r = amdgpu_ib_get(adev, NULL, total_size,
4585 					AMDGPU_IB_POOL_DIRECT, &ib);
4586 	if (r) {
4587 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4588 		return r;
4589 	}
4590 
4591 	/* load the compute shaders */
4592 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4593 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4594 
4595 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4596 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4597 
4598 	/* init the ib length to 0 */
4599 	ib.length_dw = 0;
4600 
4601 	/* VGPR */
4602 	/* write the register state for the compute dispatch */
4603 	for (i = 0; i < gpr_reg_size; i++) {
4604 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4605 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4606 								- PACKET3_SET_SH_REG_START;
4607 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4608 	}
4609 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4610 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4611 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4612 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4613 							- PACKET3_SET_SH_REG_START;
4614 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4615 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4616 
4617 	/* write dispatch packet */
4618 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4619 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4620 	ib.ptr[ib.length_dw++] = 1; /* y */
4621 	ib.ptr[ib.length_dw++] = 1; /* z */
4622 	ib.ptr[ib.length_dw++] =
4623 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4624 
4625 	/* write CS partial flush packet */
4626 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4627 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4628 
4629 	/* SGPR1 */
4630 	/* write the register state for the compute dispatch */
4631 	for (i = 0; i < gpr_reg_size; i++) {
4632 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4633 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4634 								- PACKET3_SET_SH_REG_START;
4635 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4636 	}
4637 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4638 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4639 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4640 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4641 							- PACKET3_SET_SH_REG_START;
4642 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4643 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4644 
4645 	/* write dispatch packet */
4646 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4647 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4648 	ib.ptr[ib.length_dw++] = 1; /* y */
4649 	ib.ptr[ib.length_dw++] = 1; /* z */
4650 	ib.ptr[ib.length_dw++] =
4651 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4652 
4653 	/* write CS partial flush packet */
4654 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4655 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4656 
4657 	/* SGPR2 */
4658 	/* write the register state for the compute dispatch */
4659 	for (i = 0; i < gpr_reg_size; i++) {
4660 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4661 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4662 								- PACKET3_SET_SH_REG_START;
4663 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4664 	}
4665 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4666 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4667 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4668 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4669 							- PACKET3_SET_SH_REG_START;
4670 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4671 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4672 
4673 	/* write dispatch packet */
4674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4675 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4676 	ib.ptr[ib.length_dw++] = 1; /* y */
4677 	ib.ptr[ib.length_dw++] = 1; /* z */
4678 	ib.ptr[ib.length_dw++] =
4679 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4680 
4681 	/* write CS partial flush packet */
4682 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4683 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4684 
4685 	/* shedule the ib on the ring */
4686 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4687 	if (r) {
4688 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4689 		goto fail;
4690 	}
4691 
4692 	/* wait for the GPU to finish processing the IB */
4693 	r = dma_fence_wait(f, false);
4694 	if (r) {
4695 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4696 		goto fail;
4697 	}
4698 
4699 fail:
4700 	amdgpu_ib_free(adev, &ib, NULL);
4701 	dma_fence_put(f);
4702 
4703 	return r;
4704 }
4705 
4706 static int gfx_v9_0_early_init(void *handle)
4707 {
4708 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4709 
4710 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4711 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4712 		adev->gfx.num_gfx_rings = 0;
4713 	else
4714 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4715 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4716 					  AMDGPU_MAX_COMPUTE_RINGS);
4717 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4718 	gfx_v9_0_set_ring_funcs(adev);
4719 	gfx_v9_0_set_irq_funcs(adev);
4720 	gfx_v9_0_set_gds_init(adev);
4721 	gfx_v9_0_set_rlc_funcs(adev);
4722 
4723 	/* init rlcg reg access ctrl */
4724 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4725 
4726 	return 0;
4727 }
4728 
4729 static int gfx_v9_0_ecc_late_init(void *handle)
4730 {
4731 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4732 	int r;
4733 
4734 	/*
4735 	 * Temp workaround to fix the issue that CP firmware fails to
4736 	 * update read pointer when CPDMA is writing clearing operation
4737 	 * to GDS in suspend/resume sequence on several cards. So just
4738 	 * limit this operation in cold boot sequence.
4739 	 */
4740 	if ((!adev->in_suspend) &&
4741 	    (adev->gds.gds_size)) {
4742 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4743 		if (r)
4744 			return r;
4745 	}
4746 
4747 	/* requires IBs so do in late init after IB pool is initialized */
4748 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4749 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4750 	else
4751 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4752 
4753 	if (r)
4754 		return r;
4755 
4756 	if (adev->gfx.ras &&
4757 	    adev->gfx.ras->enable_watchdog_timer)
4758 		adev->gfx.ras->enable_watchdog_timer(adev);
4759 
4760 	return 0;
4761 }
4762 
4763 static int gfx_v9_0_late_init(void *handle)
4764 {
4765 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4766 	int r;
4767 
4768 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4769 	if (r)
4770 		return r;
4771 
4772 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4773 	if (r)
4774 		return r;
4775 
4776 	r = gfx_v9_0_ecc_late_init(handle);
4777 	if (r)
4778 		return r;
4779 
4780 	return 0;
4781 }
4782 
4783 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4784 {
4785 	uint32_t rlc_setting;
4786 
4787 	/* if RLC is not enabled, do nothing */
4788 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4789 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4790 		return false;
4791 
4792 	return true;
4793 }
4794 
4795 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4796 {
4797 	uint32_t data;
4798 	unsigned i;
4799 
4800 	data = RLC_SAFE_MODE__CMD_MASK;
4801 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4802 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4803 
4804 	/* wait for RLC_SAFE_MODE */
4805 	for (i = 0; i < adev->usec_timeout; i++) {
4806 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4807 			break;
4808 		udelay(1);
4809 	}
4810 }
4811 
4812 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4813 {
4814 	uint32_t data;
4815 
4816 	data = RLC_SAFE_MODE__CMD_MASK;
4817 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4818 }
4819 
4820 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4821 						bool enable)
4822 {
4823 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4824 
4825 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4826 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4827 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4828 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4829 	} else {
4830 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4831 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4832 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4833 	}
4834 
4835 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4836 }
4837 
4838 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4839 						bool enable)
4840 {
4841 	/* TODO: double check if we need to perform under safe mode */
4842 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4843 
4844 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4845 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4846 	else
4847 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4848 
4849 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4850 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4851 	else
4852 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4853 
4854 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4855 }
4856 
4857 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4858 						      bool enable)
4859 {
4860 	uint32_t data, def;
4861 
4862 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4863 
4864 	/* It is disabled by HW by default */
4865 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4866 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4867 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4868 
4869 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4870 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4871 
4872 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4873 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4874 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4875 
4876 		/* only for Vega10 & Raven1 */
4877 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4878 
4879 		if (def != data)
4880 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4881 
4882 		/* MGLS is a global flag to control all MGLS in GFX */
4883 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4884 			/* 2 - RLC memory Light sleep */
4885 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4886 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4887 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4888 				if (def != data)
4889 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4890 			}
4891 			/* 3 - CP memory Light sleep */
4892 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4893 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4894 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4895 				if (def != data)
4896 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4897 			}
4898 		}
4899 	} else {
4900 		/* 1 - MGCG_OVERRIDE */
4901 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4902 
4903 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4904 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4905 
4906 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4907 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4908 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4909 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4910 
4911 		if (def != data)
4912 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4913 
4914 		/* 2 - disable MGLS in RLC */
4915 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4916 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4917 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4918 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4919 		}
4920 
4921 		/* 3 - disable MGLS in CP */
4922 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4923 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4924 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4925 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4926 		}
4927 	}
4928 
4929 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4930 }
4931 
4932 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4933 					   bool enable)
4934 {
4935 	uint32_t data, def;
4936 
4937 	if (!adev->gfx.num_gfx_rings)
4938 		return;
4939 
4940 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4941 
4942 	/* Enable 3D CGCG/CGLS */
4943 	if (enable) {
4944 		/* write cmd to clear cgcg/cgls ov */
4945 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4946 		/* unset CGCG override */
4947 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4948 		/* update CGCG and CGLS override bits */
4949 		if (def != data)
4950 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4951 
4952 		/* enable 3Dcgcg FSM(0x0000363f) */
4953 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4954 
4955 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4956 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4957 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4958 		else
4959 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4960 
4961 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4962 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4963 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4964 		if (def != data)
4965 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4966 
4967 		/* set IDLE_POLL_COUNT(0x00900100) */
4968 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4969 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4970 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4971 		if (def != data)
4972 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4973 	} else {
4974 		/* Disable CGCG/CGLS */
4975 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4976 		/* disable cgcg, cgls should be disabled */
4977 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4978 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4979 		/* disable cgcg and cgls in FSM */
4980 		if (def != data)
4981 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4982 	}
4983 
4984 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4985 }
4986 
4987 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4988 						      bool enable)
4989 {
4990 	uint32_t def, data;
4991 
4992 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4993 
4994 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4995 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4996 		/* unset CGCG override */
4997 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4998 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4999 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5000 		else
5001 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5002 		/* update CGCG and CGLS override bits */
5003 		if (def != data)
5004 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5005 
5006 		/* enable cgcg FSM(0x0000363F) */
5007 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5008 
5009 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5010 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5011 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5012 		else
5013 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5014 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5015 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5016 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5017 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5018 		if (def != data)
5019 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5020 
5021 		/* set IDLE_POLL_COUNT(0x00900100) */
5022 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5023 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5024 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5025 		if (def != data)
5026 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5027 	} else {
5028 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5029 		/* reset CGCG/CGLS bits */
5030 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5031 		/* disable cgcg and cgls in FSM */
5032 		if (def != data)
5033 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5034 	}
5035 
5036 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5037 }
5038 
5039 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5040 					    bool enable)
5041 {
5042 	if (enable) {
5043 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5044 		 * ===  MGCG + MGLS ===
5045 		 */
5046 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5047 		/* ===  CGCG /CGLS for GFX 3D Only === */
5048 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5049 		/* ===  CGCG + CGLS === */
5050 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5051 	} else {
5052 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5053 		 * ===  CGCG + CGLS ===
5054 		 */
5055 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5056 		/* ===  CGCG /CGLS for GFX 3D Only === */
5057 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5058 		/* ===  MGCG + MGLS === */
5059 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5060 	}
5061 	return 0;
5062 }
5063 
5064 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5065 {
5066 	u32 reg, data;
5067 
5068 	amdgpu_gfx_off_ctrl(adev, false);
5069 
5070 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5071 	if (amdgpu_sriov_is_pp_one_vf(adev))
5072 		data = RREG32_NO_KIQ(reg);
5073 	else
5074 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5075 
5076 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5077 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5078 
5079 	if (amdgpu_sriov_is_pp_one_vf(adev))
5080 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5081 	else
5082 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5083 
5084 	amdgpu_gfx_off_ctrl(adev, true);
5085 }
5086 
5087 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5088 					uint32_t offset,
5089 					struct soc15_reg_rlcg *entries, int arr_size)
5090 {
5091 	int i;
5092 	uint32_t reg;
5093 
5094 	if (!entries)
5095 		return false;
5096 
5097 	for (i = 0; i < arr_size; i++) {
5098 		const struct soc15_reg_rlcg *entry;
5099 
5100 		entry = &entries[i];
5101 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5102 		if (offset == reg)
5103 			return true;
5104 	}
5105 
5106 	return false;
5107 }
5108 
5109 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5110 {
5111 	return gfx_v9_0_check_rlcg_range(adev, offset,
5112 					(void *)rlcg_access_gc_9_0,
5113 					ARRAY_SIZE(rlcg_access_gc_9_0));
5114 }
5115 
5116 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5117 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5118 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5119 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5120 	.init = gfx_v9_0_rlc_init,
5121 	.get_csb_size = gfx_v9_0_get_csb_size,
5122 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5123 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5124 	.resume = gfx_v9_0_rlc_resume,
5125 	.stop = gfx_v9_0_rlc_stop,
5126 	.reset = gfx_v9_0_rlc_reset,
5127 	.start = gfx_v9_0_rlc_start,
5128 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5129 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5130 };
5131 
5132 static int gfx_v9_0_set_powergating_state(void *handle,
5133 					  enum amd_powergating_state state)
5134 {
5135 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5136 	bool enable = (state == AMD_PG_STATE_GATE);
5137 
5138 	switch (adev->ip_versions[GC_HWIP][0]) {
5139 	case IP_VERSION(9, 2, 2):
5140 	case IP_VERSION(9, 1, 0):
5141 	case IP_VERSION(9, 3, 0):
5142 		if (!enable)
5143 			amdgpu_gfx_off_ctrl(adev, false);
5144 
5145 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5146 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5147 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5148 		} else {
5149 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5150 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5151 		}
5152 
5153 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5154 			gfx_v9_0_enable_cp_power_gating(adev, true);
5155 		else
5156 			gfx_v9_0_enable_cp_power_gating(adev, false);
5157 
5158 		/* update gfx cgpg state */
5159 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5160 
5161 		/* update mgcg state */
5162 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5163 
5164 		if (enable)
5165 			amdgpu_gfx_off_ctrl(adev, true);
5166 		break;
5167 	case IP_VERSION(9, 2, 1):
5168 		amdgpu_gfx_off_ctrl(adev, enable);
5169 		break;
5170 	default:
5171 		break;
5172 	}
5173 
5174 	return 0;
5175 }
5176 
5177 static int gfx_v9_0_set_clockgating_state(void *handle,
5178 					  enum amd_clockgating_state state)
5179 {
5180 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5181 
5182 	if (amdgpu_sriov_vf(adev))
5183 		return 0;
5184 
5185 	switch (adev->ip_versions[GC_HWIP][0]) {
5186 	case IP_VERSION(9, 0, 1):
5187 	case IP_VERSION(9, 2, 1):
5188 	case IP_VERSION(9, 4, 0):
5189 	case IP_VERSION(9, 2, 2):
5190 	case IP_VERSION(9, 1, 0):
5191 	case IP_VERSION(9, 4, 1):
5192 	case IP_VERSION(9, 3, 0):
5193 	case IP_VERSION(9, 4, 2):
5194 		gfx_v9_0_update_gfx_clock_gating(adev,
5195 						 state == AMD_CG_STATE_GATE);
5196 		break;
5197 	default:
5198 		break;
5199 	}
5200 	return 0;
5201 }
5202 
5203 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5204 {
5205 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206 	int data;
5207 
5208 	if (amdgpu_sriov_vf(adev))
5209 		*flags = 0;
5210 
5211 	/* AMD_CG_SUPPORT_GFX_MGCG */
5212 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5213 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5214 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5215 
5216 	/* AMD_CG_SUPPORT_GFX_CGCG */
5217 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5218 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5219 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5220 
5221 	/* AMD_CG_SUPPORT_GFX_CGLS */
5222 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5223 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5224 
5225 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5226 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5227 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5228 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5229 
5230 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5231 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5232 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5233 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5234 
5235 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5236 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5237 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5238 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5239 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5240 
5241 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5242 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5243 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5244 	}
5245 }
5246 
5247 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5248 {
5249 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5250 }
5251 
5252 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5253 {
5254 	struct amdgpu_device *adev = ring->adev;
5255 	u64 wptr;
5256 
5257 	/* XXX check if swapping is necessary on BE */
5258 	if (ring->use_doorbell) {
5259 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5260 	} else {
5261 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5262 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5263 	}
5264 
5265 	return wptr;
5266 }
5267 
5268 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5269 {
5270 	struct amdgpu_device *adev = ring->adev;
5271 
5272 	if (ring->use_doorbell) {
5273 		/* XXX check if swapping is necessary on BE */
5274 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5275 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5276 	} else {
5277 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5278 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5279 	}
5280 }
5281 
5282 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5283 {
5284 	struct amdgpu_device *adev = ring->adev;
5285 	u32 ref_and_mask, reg_mem_engine;
5286 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5287 
5288 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5289 		switch (ring->me) {
5290 		case 1:
5291 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5292 			break;
5293 		case 2:
5294 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5295 			break;
5296 		default:
5297 			return;
5298 		}
5299 		reg_mem_engine = 0;
5300 	} else {
5301 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5302 		reg_mem_engine = 1; /* pfp */
5303 	}
5304 
5305 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5306 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5307 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5308 			      ref_and_mask, ref_and_mask, 0x20);
5309 }
5310 
5311 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5312 					struct amdgpu_job *job,
5313 					struct amdgpu_ib *ib,
5314 					uint32_t flags)
5315 {
5316 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5317 	u32 header, control = 0;
5318 
5319 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5320 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5321 	else
5322 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5323 
5324 	control |= ib->length_dw | (vmid << 24);
5325 
5326 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5327 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5328 
5329 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5330 			gfx_v9_0_ring_emit_de_meta(ring);
5331 	}
5332 
5333 	amdgpu_ring_write(ring, header);
5334 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5335 	amdgpu_ring_write(ring,
5336 #ifdef __BIG_ENDIAN
5337 		(2 << 0) |
5338 #endif
5339 		lower_32_bits(ib->gpu_addr));
5340 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5341 	amdgpu_ring_write(ring, control);
5342 }
5343 
5344 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5345 					  struct amdgpu_job *job,
5346 					  struct amdgpu_ib *ib,
5347 					  uint32_t flags)
5348 {
5349 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5350 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5351 
5352 	/* Currently, there is a high possibility to get wave ID mismatch
5353 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5354 	 * different wave IDs than the GDS expects. This situation happens
5355 	 * randomly when at least 5 compute pipes use GDS ordered append.
5356 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5357 	 * Those are probably bugs somewhere else in the kernel driver.
5358 	 *
5359 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5360 	 * GDS to 0 for this ring (me/pipe).
5361 	 */
5362 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5363 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5364 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5365 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5366 	}
5367 
5368 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5369 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5370 	amdgpu_ring_write(ring,
5371 #ifdef __BIG_ENDIAN
5372 				(2 << 0) |
5373 #endif
5374 				lower_32_bits(ib->gpu_addr));
5375 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5376 	amdgpu_ring_write(ring, control);
5377 }
5378 
5379 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5380 				     u64 seq, unsigned flags)
5381 {
5382 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5383 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5384 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5385 
5386 	/* RELEASE_MEM - flush caches, send int */
5387 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5388 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5389 					       EOP_TC_NC_ACTION_EN) :
5390 					      (EOP_TCL1_ACTION_EN |
5391 					       EOP_TC_ACTION_EN |
5392 					       EOP_TC_WB_ACTION_EN |
5393 					       EOP_TC_MD_ACTION_EN)) |
5394 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5395 				 EVENT_INDEX(5)));
5396 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5397 
5398 	/*
5399 	 * the address should be Qword aligned if 64bit write, Dword
5400 	 * aligned if only send 32bit data low (discard data high)
5401 	 */
5402 	if (write64bit)
5403 		BUG_ON(addr & 0x7);
5404 	else
5405 		BUG_ON(addr & 0x3);
5406 	amdgpu_ring_write(ring, lower_32_bits(addr));
5407 	amdgpu_ring_write(ring, upper_32_bits(addr));
5408 	amdgpu_ring_write(ring, lower_32_bits(seq));
5409 	amdgpu_ring_write(ring, upper_32_bits(seq));
5410 	amdgpu_ring_write(ring, 0);
5411 }
5412 
5413 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5414 {
5415 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5416 	uint32_t seq = ring->fence_drv.sync_seq;
5417 	uint64_t addr = ring->fence_drv.gpu_addr;
5418 
5419 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5420 			      lower_32_bits(addr), upper_32_bits(addr),
5421 			      seq, 0xffffffff, 4);
5422 }
5423 
5424 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5425 					unsigned vmid, uint64_t pd_addr)
5426 {
5427 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5428 
5429 	/* compute doesn't have PFP */
5430 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5431 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5432 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5433 		amdgpu_ring_write(ring, 0x0);
5434 	}
5435 }
5436 
5437 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5438 {
5439 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5440 }
5441 
5442 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5443 {
5444 	u64 wptr;
5445 
5446 	/* XXX check if swapping is necessary on BE */
5447 	if (ring->use_doorbell)
5448 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5449 	else
5450 		BUG();
5451 	return wptr;
5452 }
5453 
5454 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5455 {
5456 	struct amdgpu_device *adev = ring->adev;
5457 
5458 	/* XXX check if swapping is necessary on BE */
5459 	if (ring->use_doorbell) {
5460 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5461 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5462 	} else{
5463 		BUG(); /* only DOORBELL method supported on gfx9 now */
5464 	}
5465 }
5466 
5467 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5468 					 u64 seq, unsigned int flags)
5469 {
5470 	struct amdgpu_device *adev = ring->adev;
5471 
5472 	/* we only allocate 32bit for each seq wb address */
5473 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5474 
5475 	/* write fence seq to the "addr" */
5476 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5477 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5478 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5479 	amdgpu_ring_write(ring, lower_32_bits(addr));
5480 	amdgpu_ring_write(ring, upper_32_bits(addr));
5481 	amdgpu_ring_write(ring, lower_32_bits(seq));
5482 
5483 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5484 		/* set register to trigger INT */
5485 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5486 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5487 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5488 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5489 		amdgpu_ring_write(ring, 0);
5490 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5491 	}
5492 }
5493 
5494 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5495 {
5496 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5497 	amdgpu_ring_write(ring, 0);
5498 }
5499 
5500 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5501 {
5502 	struct v9_ce_ib_state ce_payload = {0};
5503 	uint64_t csa_addr;
5504 	int cnt;
5505 
5506 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5507 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5508 
5509 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5510 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5511 				 WRITE_DATA_DST_SEL(8) |
5512 				 WR_CONFIRM) |
5513 				 WRITE_DATA_CACHE_POLICY(0));
5514 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5515 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5516 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5517 }
5518 
5519 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5520 {
5521 	struct v9_de_ib_state de_payload = {0};
5522 	uint64_t csa_addr, gds_addr;
5523 	int cnt;
5524 
5525 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5526 	gds_addr = csa_addr + 4096;
5527 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5528 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5529 
5530 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5531 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5532 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5533 				 WRITE_DATA_DST_SEL(8) |
5534 				 WR_CONFIRM) |
5535 				 WRITE_DATA_CACHE_POLICY(0));
5536 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5537 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5538 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5539 }
5540 
5541 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5542 				   bool secure)
5543 {
5544 	uint32_t v = secure ? FRAME_TMZ : 0;
5545 
5546 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5547 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5548 }
5549 
5550 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5551 {
5552 	uint32_t dw2 = 0;
5553 
5554 	if (amdgpu_sriov_vf(ring->adev))
5555 		gfx_v9_0_ring_emit_ce_meta(ring);
5556 
5557 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5558 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5559 		/* set load_global_config & load_global_uconfig */
5560 		dw2 |= 0x8001;
5561 		/* set load_cs_sh_regs */
5562 		dw2 |= 0x01000000;
5563 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5564 		dw2 |= 0x10002;
5565 
5566 		/* set load_ce_ram if preamble presented */
5567 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5568 			dw2 |= 0x10000000;
5569 	} else {
5570 		/* still load_ce_ram if this is the first time preamble presented
5571 		 * although there is no context switch happens.
5572 		 */
5573 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5574 			dw2 |= 0x10000000;
5575 	}
5576 
5577 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5578 	amdgpu_ring_write(ring, dw2);
5579 	amdgpu_ring_write(ring, 0);
5580 }
5581 
5582 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5583 {
5584 	unsigned ret;
5585 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5586 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5587 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5588 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5589 	ret = ring->wptr & ring->buf_mask;
5590 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5591 	return ret;
5592 }
5593 
5594 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5595 {
5596 	unsigned cur;
5597 	BUG_ON(offset > ring->buf_mask);
5598 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5599 
5600 	cur = (ring->wptr & ring->buf_mask) - 1;
5601 	if (likely(cur > offset))
5602 		ring->ring[offset] = cur - offset;
5603 	else
5604 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5605 }
5606 
5607 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5608 				    uint32_t reg_val_offs)
5609 {
5610 	struct amdgpu_device *adev = ring->adev;
5611 
5612 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5613 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5614 				(5 << 8) |	/* dst: memory */
5615 				(1 << 20));	/* write confirm */
5616 	amdgpu_ring_write(ring, reg);
5617 	amdgpu_ring_write(ring, 0);
5618 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5619 				reg_val_offs * 4));
5620 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5621 				reg_val_offs * 4));
5622 }
5623 
5624 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5625 				    uint32_t val)
5626 {
5627 	uint32_t cmd = 0;
5628 
5629 	switch (ring->funcs->type) {
5630 	case AMDGPU_RING_TYPE_GFX:
5631 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5632 		break;
5633 	case AMDGPU_RING_TYPE_KIQ:
5634 		cmd = (1 << 16); /* no inc addr */
5635 		break;
5636 	default:
5637 		cmd = WR_CONFIRM;
5638 		break;
5639 	}
5640 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5641 	amdgpu_ring_write(ring, cmd);
5642 	amdgpu_ring_write(ring, reg);
5643 	amdgpu_ring_write(ring, 0);
5644 	amdgpu_ring_write(ring, val);
5645 }
5646 
5647 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5648 					uint32_t val, uint32_t mask)
5649 {
5650 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5651 }
5652 
5653 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5654 						  uint32_t reg0, uint32_t reg1,
5655 						  uint32_t ref, uint32_t mask)
5656 {
5657 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5658 	struct amdgpu_device *adev = ring->adev;
5659 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5660 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5661 
5662 	if (fw_version_ok)
5663 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5664 				      ref, mask, 0x20);
5665 	else
5666 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5667 							   ref, mask);
5668 }
5669 
5670 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5671 {
5672 	struct amdgpu_device *adev = ring->adev;
5673 	uint32_t value = 0;
5674 
5675 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5676 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5677 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5678 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5679 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5680 }
5681 
5682 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5683 						 enum amdgpu_interrupt_state state)
5684 {
5685 	switch (state) {
5686 	case AMDGPU_IRQ_STATE_DISABLE:
5687 	case AMDGPU_IRQ_STATE_ENABLE:
5688 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5689 			       TIME_STAMP_INT_ENABLE,
5690 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5691 		break;
5692 	default:
5693 		break;
5694 	}
5695 }
5696 
5697 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5698 						     int me, int pipe,
5699 						     enum amdgpu_interrupt_state state)
5700 {
5701 	u32 mec_int_cntl, mec_int_cntl_reg;
5702 
5703 	/*
5704 	 * amdgpu controls only the first MEC. That's why this function only
5705 	 * handles the setting of interrupts for this specific MEC. All other
5706 	 * pipes' interrupts are set by amdkfd.
5707 	 */
5708 
5709 	if (me == 1) {
5710 		switch (pipe) {
5711 		case 0:
5712 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5713 			break;
5714 		case 1:
5715 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5716 			break;
5717 		case 2:
5718 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5719 			break;
5720 		case 3:
5721 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5722 			break;
5723 		default:
5724 			DRM_DEBUG("invalid pipe %d\n", pipe);
5725 			return;
5726 		}
5727 	} else {
5728 		DRM_DEBUG("invalid me %d\n", me);
5729 		return;
5730 	}
5731 
5732 	switch (state) {
5733 	case AMDGPU_IRQ_STATE_DISABLE:
5734 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5735 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5736 					     TIME_STAMP_INT_ENABLE, 0);
5737 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5738 		break;
5739 	case AMDGPU_IRQ_STATE_ENABLE:
5740 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5741 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5742 					     TIME_STAMP_INT_ENABLE, 1);
5743 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5744 		break;
5745 	default:
5746 		break;
5747 	}
5748 }
5749 
5750 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5751 					     struct amdgpu_irq_src *source,
5752 					     unsigned type,
5753 					     enum amdgpu_interrupt_state state)
5754 {
5755 	switch (state) {
5756 	case AMDGPU_IRQ_STATE_DISABLE:
5757 	case AMDGPU_IRQ_STATE_ENABLE:
5758 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5759 			       PRIV_REG_INT_ENABLE,
5760 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5761 		break;
5762 	default:
5763 		break;
5764 	}
5765 
5766 	return 0;
5767 }
5768 
5769 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5770 					      struct amdgpu_irq_src *source,
5771 					      unsigned type,
5772 					      enum amdgpu_interrupt_state state)
5773 {
5774 	switch (state) {
5775 	case AMDGPU_IRQ_STATE_DISABLE:
5776 	case AMDGPU_IRQ_STATE_ENABLE:
5777 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5778 			       PRIV_INSTR_INT_ENABLE,
5779 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5780 		break;
5781 	default:
5782 		break;
5783 	}
5784 
5785 	return 0;
5786 }
5787 
5788 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5789 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5790 			CP_ECC_ERROR_INT_ENABLE, 1)
5791 
5792 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5793 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5794 			CP_ECC_ERROR_INT_ENABLE, 0)
5795 
5796 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5797 					      struct amdgpu_irq_src *source,
5798 					      unsigned type,
5799 					      enum amdgpu_interrupt_state state)
5800 {
5801 	switch (state) {
5802 	case AMDGPU_IRQ_STATE_DISABLE:
5803 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5804 				CP_ECC_ERROR_INT_ENABLE, 0);
5805 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5806 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5807 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5808 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5809 		break;
5810 
5811 	case AMDGPU_IRQ_STATE_ENABLE:
5812 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5813 				CP_ECC_ERROR_INT_ENABLE, 1);
5814 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5815 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5816 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5817 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5818 		break;
5819 	default:
5820 		break;
5821 	}
5822 
5823 	return 0;
5824 }
5825 
5826 
5827 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5828 					    struct amdgpu_irq_src *src,
5829 					    unsigned type,
5830 					    enum amdgpu_interrupt_state state)
5831 {
5832 	switch (type) {
5833 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5834 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5835 		break;
5836 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5837 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5838 		break;
5839 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5840 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5841 		break;
5842 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5843 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5844 		break;
5845 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5846 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5847 		break;
5848 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5849 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5850 		break;
5851 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5852 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5853 		break;
5854 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5855 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5856 		break;
5857 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5858 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5859 		break;
5860 	default:
5861 		break;
5862 	}
5863 	return 0;
5864 }
5865 
5866 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5867 			    struct amdgpu_irq_src *source,
5868 			    struct amdgpu_iv_entry *entry)
5869 {
5870 	int i;
5871 	u8 me_id, pipe_id, queue_id;
5872 	struct amdgpu_ring *ring;
5873 
5874 	DRM_DEBUG("IH: CP EOP\n");
5875 	me_id = (entry->ring_id & 0x0c) >> 2;
5876 	pipe_id = (entry->ring_id & 0x03) >> 0;
5877 	queue_id = (entry->ring_id & 0x70) >> 4;
5878 
5879 	switch (me_id) {
5880 	case 0:
5881 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5882 		break;
5883 	case 1:
5884 	case 2:
5885 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5886 			ring = &adev->gfx.compute_ring[i];
5887 			/* Per-queue interrupt is supported for MEC starting from VI.
5888 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5889 			  */
5890 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5891 				amdgpu_fence_process(ring);
5892 		}
5893 		break;
5894 	}
5895 	return 0;
5896 }
5897 
5898 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5899 			   struct amdgpu_iv_entry *entry)
5900 {
5901 	u8 me_id, pipe_id, queue_id;
5902 	struct amdgpu_ring *ring;
5903 	int i;
5904 
5905 	me_id = (entry->ring_id & 0x0c) >> 2;
5906 	pipe_id = (entry->ring_id & 0x03) >> 0;
5907 	queue_id = (entry->ring_id & 0x70) >> 4;
5908 
5909 	switch (me_id) {
5910 	case 0:
5911 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5912 		break;
5913 	case 1:
5914 	case 2:
5915 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5916 			ring = &adev->gfx.compute_ring[i];
5917 			if (ring->me == me_id && ring->pipe == pipe_id &&
5918 			    ring->queue == queue_id)
5919 				drm_sched_fault(&ring->sched);
5920 		}
5921 		break;
5922 	}
5923 }
5924 
5925 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5926 				 struct amdgpu_irq_src *source,
5927 				 struct amdgpu_iv_entry *entry)
5928 {
5929 	DRM_ERROR("Illegal register access in command stream\n");
5930 	gfx_v9_0_fault(adev, entry);
5931 	return 0;
5932 }
5933 
5934 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5935 				  struct amdgpu_irq_src *source,
5936 				  struct amdgpu_iv_entry *entry)
5937 {
5938 	DRM_ERROR("Illegal instruction in command stream\n");
5939 	gfx_v9_0_fault(adev, entry);
5940 	return 0;
5941 }
5942 
5943 
5944 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5945 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5946 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5947 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5948 	},
5949 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5950 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5951 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5952 	},
5953 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5954 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5955 	  0, 0
5956 	},
5957 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5958 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5959 	  0, 0
5960 	},
5961 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5962 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5963 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5964 	},
5965 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5966 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5967 	  0, 0
5968 	},
5969 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5970 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5971 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5972 	},
5973 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5974 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5975 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5976 	},
5977 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5978 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5979 	  0, 0
5980 	},
5981 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5982 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5983 	  0, 0
5984 	},
5985 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5986 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5987 	  0, 0
5988 	},
5989 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5990 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5991 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5992 	},
5993 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5994 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5995 	  0, 0
5996 	},
5997 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5998 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5999 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6000 	},
6001 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6002 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6003 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6004 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6005 	},
6006 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6007 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6008 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6009 	  0, 0
6010 	},
6011 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6012 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6013 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6014 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6015 	},
6016 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6017 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6018 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6019 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6020 	},
6021 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6022 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6023 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6024 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6025 	},
6026 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6027 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6028 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6029 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6030 	},
6031 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6032 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6033 	  0, 0
6034 	},
6035 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6036 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6037 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6038 	},
6039 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6040 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6041 	  0, 0
6042 	},
6043 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6044 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6045 	  0, 0
6046 	},
6047 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6048 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6049 	  0, 0
6050 	},
6051 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6052 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6053 	  0, 0
6054 	},
6055 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6056 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6057 	  0, 0
6058 	},
6059 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6060 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6061 	  0, 0
6062 	},
6063 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6064 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6065 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6066 	},
6067 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6068 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6069 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6070 	},
6071 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6072 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6073 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6074 	},
6075 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6076 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6077 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6078 	},
6079 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6080 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6081 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6082 	},
6083 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6084 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6085 	  0, 0
6086 	},
6087 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6088 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6089 	  0, 0
6090 	},
6091 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6092 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6096 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6097 	  0, 0
6098 	},
6099 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6100 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6101 	  0, 0
6102 	},
6103 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6104 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6105 	  0, 0
6106 	},
6107 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6108 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6109 	  0, 0
6110 	},
6111 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6112 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6113 	  0, 0
6114 	},
6115 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6116 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6117 	  0, 0
6118 	},
6119 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6120 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6121 	  0, 0
6122 	},
6123 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6124 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6125 	  0, 0
6126 	},
6127 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6128 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6129 	  0, 0
6130 	},
6131 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6132 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6133 	  0, 0
6134 	},
6135 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6136 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6137 	  0, 0
6138 	},
6139 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6140 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6141 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6142 	},
6143 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6144 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6145 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6146 	},
6147 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6148 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6149 	  0, 0
6150 	},
6151 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6152 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6153 	  0, 0
6154 	},
6155 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6156 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6157 	  0, 0
6158 	},
6159 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6160 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6161 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6162 	},
6163 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6164 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6165 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6166 	},
6167 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6168 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6169 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6170 	},
6171 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6172 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6173 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6174 	},
6175 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6176 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6177 	  0, 0
6178 	},
6179 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6180 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6181 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6182 	},
6183 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6184 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6185 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6186 	},
6187 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6188 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6189 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6190 	},
6191 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6192 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6193 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6194 	},
6195 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6196 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6197 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6198 	},
6199 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6200 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6201 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6202 	},
6203 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6204 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6205 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6206 	},
6207 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6208 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6209 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6210 	},
6211 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6212 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6213 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6214 	},
6215 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6216 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6217 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6218 	},
6219 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6220 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6221 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6222 	},
6223 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6224 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6225 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6226 	},
6227 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6228 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6229 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6230 	},
6231 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6232 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6233 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6234 	},
6235 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6236 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6237 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6238 	},
6239 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6240 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6241 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6242 	},
6243 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6244 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6245 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6246 	},
6247 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6248 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6249 	  0, 0
6250 	},
6251 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6252 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6253 	  0, 0
6254 	},
6255 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6256 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6257 	  0, 0
6258 	},
6259 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6260 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6261 	  0, 0
6262 	},
6263 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6264 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6265 	  0, 0
6266 	},
6267 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6268 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6270 	},
6271 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6272 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6274 	},
6275 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6276 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6278 	},
6279 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6280 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6282 	},
6283 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6284 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6286 	},
6287 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6288 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6289 	  0, 0
6290 	},
6291 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6292 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6293 	  0, 0
6294 	},
6295 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6296 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6297 	  0, 0
6298 	},
6299 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6300 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6301 	  0, 0
6302 	},
6303 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6304 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6305 	  0, 0
6306 	},
6307 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6308 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6309 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6310 	},
6311 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6312 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6313 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6314 	},
6315 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6316 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6317 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6318 	},
6319 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6320 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6321 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6322 	},
6323 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6324 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6325 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6326 	},
6327 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6328 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6329 	  0, 0
6330 	},
6331 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6332 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6333 	  0, 0
6334 	},
6335 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6336 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6337 	  0, 0
6338 	},
6339 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6340 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6341 	  0, 0
6342 	},
6343 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6344 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6345 	  0, 0
6346 	},
6347 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6348 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6350 	},
6351 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6352 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6354 	},
6355 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6356 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6358 	},
6359 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6360 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6361 	  0, 0
6362 	},
6363 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6364 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6365 	  0, 0
6366 	},
6367 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6368 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6369 	  0, 0
6370 	},
6371 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6372 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6373 	  0, 0
6374 	},
6375 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6376 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6377 	  0, 0
6378 	},
6379 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6380 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6381 	  0, 0
6382 	}
6383 };
6384 
6385 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6386 				     void *inject_if)
6387 {
6388 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6389 	int ret;
6390 	struct ta_ras_trigger_error_input block_info = { 0 };
6391 
6392 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6393 		return -EINVAL;
6394 
6395 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6396 		return -EINVAL;
6397 
6398 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6399 		return -EPERM;
6400 
6401 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6402 	      info->head.type)) {
6403 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6404 			ras_gfx_subblocks[info->head.sub_block_index].name,
6405 			info->head.type);
6406 		return -EPERM;
6407 	}
6408 
6409 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6410 	      info->head.type)) {
6411 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6412 			ras_gfx_subblocks[info->head.sub_block_index].name,
6413 			info->head.type);
6414 		return -EPERM;
6415 	}
6416 
6417 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6418 	block_info.sub_block_index =
6419 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6420 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6421 	block_info.address = info->address;
6422 	block_info.value = info->value;
6423 
6424 	mutex_lock(&adev->grbm_idx_mutex);
6425 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6426 	mutex_unlock(&adev->grbm_idx_mutex);
6427 
6428 	return ret;
6429 }
6430 
6431 static const char *vml2_mems[] = {
6432 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6433 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6434 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6435 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6436 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6437 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6438 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6439 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6440 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6441 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6442 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6443 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6444 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6445 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6446 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6447 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6448 };
6449 
6450 static const char *vml2_walker_mems[] = {
6451 	"UTC_VML2_CACHE_PDE0_MEM0",
6452 	"UTC_VML2_CACHE_PDE0_MEM1",
6453 	"UTC_VML2_CACHE_PDE1_MEM0",
6454 	"UTC_VML2_CACHE_PDE1_MEM1",
6455 	"UTC_VML2_CACHE_PDE2_MEM0",
6456 	"UTC_VML2_CACHE_PDE2_MEM1",
6457 	"UTC_VML2_RDIF_LOG_FIFO",
6458 };
6459 
6460 static const char *atc_l2_cache_2m_mems[] = {
6461 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6462 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6463 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6464 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6465 };
6466 
6467 static const char *atc_l2_cache_4k_mems[] = {
6468 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6469 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6470 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6471 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6472 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6473 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6474 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6475 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6476 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6477 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6478 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6479 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6480 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6481 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6482 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6483 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6484 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6485 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6486 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6487 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6488 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6489 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6490 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6491 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6492 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6493 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6494 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6495 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6496 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6497 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6498 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6499 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6500 };
6501 
6502 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6503 					 struct ras_err_data *err_data)
6504 {
6505 	uint32_t i, data;
6506 	uint32_t sec_count, ded_count;
6507 
6508 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6509 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6510 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6511 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6512 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6513 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6514 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6515 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6516 
6517 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6518 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6519 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6520 
6521 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6522 		if (sec_count) {
6523 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6524 				"SEC %d\n", i, vml2_mems[i], sec_count);
6525 			err_data->ce_count += sec_count;
6526 		}
6527 
6528 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6529 		if (ded_count) {
6530 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6531 				"DED %d\n", i, vml2_mems[i], ded_count);
6532 			err_data->ue_count += ded_count;
6533 		}
6534 	}
6535 
6536 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6537 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6538 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6539 
6540 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6541 						SEC_COUNT);
6542 		if (sec_count) {
6543 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6544 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6545 			err_data->ce_count += sec_count;
6546 		}
6547 
6548 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6549 						DED_COUNT);
6550 		if (ded_count) {
6551 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6552 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6553 			err_data->ue_count += ded_count;
6554 		}
6555 	}
6556 
6557 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6558 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6559 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6560 
6561 		sec_count = (data & 0x00006000L) >> 0xd;
6562 		if (sec_count) {
6563 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6564 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6565 				sec_count);
6566 			err_data->ce_count += sec_count;
6567 		}
6568 	}
6569 
6570 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6571 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6572 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6573 
6574 		sec_count = (data & 0x00006000L) >> 0xd;
6575 		if (sec_count) {
6576 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6577 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6578 				sec_count);
6579 			err_data->ce_count += sec_count;
6580 		}
6581 
6582 		ded_count = (data & 0x00018000L) >> 0xf;
6583 		if (ded_count) {
6584 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6585 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6586 				ded_count);
6587 			err_data->ue_count += ded_count;
6588 		}
6589 	}
6590 
6591 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6592 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6593 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6594 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6595 
6596 	return 0;
6597 }
6598 
6599 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6600 	const struct soc15_reg_entry *reg,
6601 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6602 	uint32_t *sec_count, uint32_t *ded_count)
6603 {
6604 	uint32_t i;
6605 	uint32_t sec_cnt, ded_cnt;
6606 
6607 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6608 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6609 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6610 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6611 			continue;
6612 
6613 		sec_cnt = (value &
6614 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6615 				gfx_v9_0_ras_fields[i].sec_count_shift;
6616 		if (sec_cnt) {
6617 			dev_info(adev->dev, "GFX SubBlock %s, "
6618 				"Instance[%d][%d], SEC %d\n",
6619 				gfx_v9_0_ras_fields[i].name,
6620 				se_id, inst_id,
6621 				sec_cnt);
6622 			*sec_count += sec_cnt;
6623 		}
6624 
6625 		ded_cnt = (value &
6626 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6627 				gfx_v9_0_ras_fields[i].ded_count_shift;
6628 		if (ded_cnt) {
6629 			dev_info(adev->dev, "GFX SubBlock %s, "
6630 				"Instance[%d][%d], DED %d\n",
6631 				gfx_v9_0_ras_fields[i].name,
6632 				se_id, inst_id,
6633 				ded_cnt);
6634 			*ded_count += ded_cnt;
6635 		}
6636 	}
6637 
6638 	return 0;
6639 }
6640 
6641 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6642 {
6643 	int i, j, k;
6644 
6645 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6646 		return;
6647 
6648 	/* read back registers to clear the counters */
6649 	mutex_lock(&adev->grbm_idx_mutex);
6650 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6651 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6652 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6653 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6654 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6655 			}
6656 		}
6657 	}
6658 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6659 	mutex_unlock(&adev->grbm_idx_mutex);
6660 
6661 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6662 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6663 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6664 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6665 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6666 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6667 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6668 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6669 
6670 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6671 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6672 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6673 	}
6674 
6675 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6676 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6677 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6678 	}
6679 
6680 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6681 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6682 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6683 	}
6684 
6685 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6686 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6687 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6688 	}
6689 
6690 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6691 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6692 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6693 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6694 }
6695 
6696 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6697 					  void *ras_error_status)
6698 {
6699 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6700 	uint32_t sec_count = 0, ded_count = 0;
6701 	uint32_t i, j, k;
6702 	uint32_t reg_value;
6703 
6704 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6705 		return;
6706 
6707 	err_data->ue_count = 0;
6708 	err_data->ce_count = 0;
6709 
6710 	mutex_lock(&adev->grbm_idx_mutex);
6711 
6712 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6713 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6714 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6715 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6716 				reg_value =
6717 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6718 				if (reg_value)
6719 					gfx_v9_0_ras_error_count(adev,
6720 						&gfx_v9_0_edc_counter_regs[i],
6721 						j, k, reg_value,
6722 						&sec_count, &ded_count);
6723 			}
6724 		}
6725 	}
6726 
6727 	err_data->ce_count += sec_count;
6728 	err_data->ue_count += ded_count;
6729 
6730 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6731 	mutex_unlock(&adev->grbm_idx_mutex);
6732 
6733 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6734 }
6735 
6736 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6737 {
6738 	const unsigned int cp_coher_cntl =
6739 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6740 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6741 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6742 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6743 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6744 
6745 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6746 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6747 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6748 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6749 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6750 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6751 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6752 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6753 }
6754 
6755 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6756 					uint32_t pipe, bool enable)
6757 {
6758 	struct amdgpu_device *adev = ring->adev;
6759 	uint32_t val;
6760 	uint32_t wcl_cs_reg;
6761 
6762 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6763 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6764 
6765 	switch (pipe) {
6766 	case 0:
6767 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6768 		break;
6769 	case 1:
6770 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6771 		break;
6772 	case 2:
6773 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6774 		break;
6775 	case 3:
6776 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6777 		break;
6778 	default:
6779 		DRM_DEBUG("invalid pipe %d\n", pipe);
6780 		return;
6781 	}
6782 
6783 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6784 
6785 }
6786 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6787 {
6788 	struct amdgpu_device *adev = ring->adev;
6789 	uint32_t val;
6790 	int i;
6791 
6792 
6793 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6794 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6795 	 * around 25% of gpu resources.
6796 	 */
6797 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6798 	amdgpu_ring_emit_wreg(ring,
6799 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6800 			      val);
6801 
6802 	/* Restrict waves for normal/low priority compute queues as well
6803 	 * to get best QoS for high priority compute jobs.
6804 	 *
6805 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6806 	 */
6807 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6808 		if (i != ring->pipe)
6809 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6810 
6811 	}
6812 }
6813 
6814 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6815 	.name = "gfx_v9_0",
6816 	.early_init = gfx_v9_0_early_init,
6817 	.late_init = gfx_v9_0_late_init,
6818 	.sw_init = gfx_v9_0_sw_init,
6819 	.sw_fini = gfx_v9_0_sw_fini,
6820 	.hw_init = gfx_v9_0_hw_init,
6821 	.hw_fini = gfx_v9_0_hw_fini,
6822 	.suspend = gfx_v9_0_suspend,
6823 	.resume = gfx_v9_0_resume,
6824 	.is_idle = gfx_v9_0_is_idle,
6825 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6826 	.soft_reset = gfx_v9_0_soft_reset,
6827 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6828 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6829 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6830 };
6831 
6832 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6833 	.type = AMDGPU_RING_TYPE_GFX,
6834 	.align_mask = 0xff,
6835 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6836 	.support_64bit_ptrs = true,
6837 	.secure_submission_supported = true,
6838 	.vmhub = AMDGPU_GFXHUB_0,
6839 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6840 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6841 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6842 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6843 		5 +  /* COND_EXEC */
6844 		7 +  /* PIPELINE_SYNC */
6845 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6846 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6847 		2 + /* VM_FLUSH */
6848 		8 +  /* FENCE for VM_FLUSH */
6849 		20 + /* GDS switch */
6850 		4 + /* double SWITCH_BUFFER,
6851 		       the first COND_EXEC jump to the place just
6852 			   prior to this double SWITCH_BUFFER  */
6853 		5 + /* COND_EXEC */
6854 		7 +	 /*	HDP_flush */
6855 		4 +	 /*	VGT_flush */
6856 		14 + /*	CE_META */
6857 		31 + /*	DE_META */
6858 		3 + /* CNTX_CTRL */
6859 		5 + /* HDP_INVL */
6860 		8 + 8 + /* FENCE x2 */
6861 		2 + /* SWITCH_BUFFER */
6862 		7, /* gfx_v9_0_emit_mem_sync */
6863 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6864 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6865 	.emit_fence = gfx_v9_0_ring_emit_fence,
6866 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6867 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6868 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6869 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6870 	.test_ring = gfx_v9_0_ring_test_ring,
6871 	.test_ib = gfx_v9_0_ring_test_ib,
6872 	.insert_nop = amdgpu_ring_insert_nop,
6873 	.pad_ib = amdgpu_ring_generic_pad_ib,
6874 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6875 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6876 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6877 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6878 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6879 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6880 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6881 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6882 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6883 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6884 };
6885 
6886 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6887 	.type = AMDGPU_RING_TYPE_COMPUTE,
6888 	.align_mask = 0xff,
6889 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890 	.support_64bit_ptrs = true,
6891 	.vmhub = AMDGPU_GFXHUB_0,
6892 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6893 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6894 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6895 	.emit_frame_size =
6896 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6897 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6898 		5 + /* hdp invalidate */
6899 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6900 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6901 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6902 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6903 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6904 		7 + /* gfx_v9_0_emit_mem_sync */
6905 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6906 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6907 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6908 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6909 	.emit_fence = gfx_v9_0_ring_emit_fence,
6910 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6911 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6912 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6913 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6914 	.test_ring = gfx_v9_0_ring_test_ring,
6915 	.test_ib = gfx_v9_0_ring_test_ib,
6916 	.insert_nop = amdgpu_ring_insert_nop,
6917 	.pad_ib = amdgpu_ring_generic_pad_ib,
6918 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6919 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6920 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6921 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6922 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6923 };
6924 
6925 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6926 	.type = AMDGPU_RING_TYPE_KIQ,
6927 	.align_mask = 0xff,
6928 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6929 	.support_64bit_ptrs = true,
6930 	.vmhub = AMDGPU_GFXHUB_0,
6931 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6932 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6933 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6934 	.emit_frame_size =
6935 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6936 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6937 		5 + /* hdp invalidate */
6938 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6939 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6940 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6941 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6942 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6943 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6944 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6945 	.test_ring = gfx_v9_0_ring_test_ring,
6946 	.insert_nop = amdgpu_ring_insert_nop,
6947 	.pad_ib = amdgpu_ring_generic_pad_ib,
6948 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6949 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6950 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6951 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6952 };
6953 
6954 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6955 {
6956 	int i;
6957 
6958 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6959 
6960 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6961 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6962 
6963 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6964 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6965 }
6966 
6967 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6968 	.set = gfx_v9_0_set_eop_interrupt_state,
6969 	.process = gfx_v9_0_eop_irq,
6970 };
6971 
6972 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6973 	.set = gfx_v9_0_set_priv_reg_fault_state,
6974 	.process = gfx_v9_0_priv_reg_irq,
6975 };
6976 
6977 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6978 	.set = gfx_v9_0_set_priv_inst_fault_state,
6979 	.process = gfx_v9_0_priv_inst_irq,
6980 };
6981 
6982 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6983 	.set = gfx_v9_0_set_cp_ecc_error_state,
6984 	.process = amdgpu_gfx_cp_ecc_error_irq,
6985 };
6986 
6987 
6988 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6989 {
6990 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6991 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6992 
6993 	adev->gfx.priv_reg_irq.num_types = 1;
6994 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6995 
6996 	adev->gfx.priv_inst_irq.num_types = 1;
6997 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6998 
6999 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7000 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7001 }
7002 
7003 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7004 {
7005 	switch (adev->ip_versions[GC_HWIP][0]) {
7006 	case IP_VERSION(9, 0, 1):
7007 	case IP_VERSION(9, 2, 1):
7008 	case IP_VERSION(9, 4, 0):
7009 	case IP_VERSION(9, 2, 2):
7010 	case IP_VERSION(9, 1, 0):
7011 	case IP_VERSION(9, 4, 1):
7012 	case IP_VERSION(9, 3, 0):
7013 	case IP_VERSION(9, 4, 2):
7014 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7015 		break;
7016 	default:
7017 		break;
7018 	}
7019 }
7020 
7021 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7022 {
7023 	/* init asci gds info */
7024 	switch (adev->ip_versions[GC_HWIP][0]) {
7025 	case IP_VERSION(9, 0, 1):
7026 	case IP_VERSION(9, 2, 1):
7027 	case IP_VERSION(9, 4, 0):
7028 		adev->gds.gds_size = 0x10000;
7029 		break;
7030 	case IP_VERSION(9, 2, 2):
7031 	case IP_VERSION(9, 1, 0):
7032 	case IP_VERSION(9, 4, 1):
7033 		adev->gds.gds_size = 0x1000;
7034 		break;
7035 	case IP_VERSION(9, 4, 2):
7036 		/* aldebaran removed all the GDS internal memory,
7037 		 * only support GWS opcode in kernel, like barrier
7038 		 * semaphore.etc */
7039 		adev->gds.gds_size = 0;
7040 		break;
7041 	default:
7042 		adev->gds.gds_size = 0x10000;
7043 		break;
7044 	}
7045 
7046 	switch (adev->ip_versions[GC_HWIP][0]) {
7047 	case IP_VERSION(9, 0, 1):
7048 	case IP_VERSION(9, 4, 0):
7049 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7050 		break;
7051 	case IP_VERSION(9, 2, 1):
7052 		adev->gds.gds_compute_max_wave_id = 0x27f;
7053 		break;
7054 	case IP_VERSION(9, 2, 2):
7055 	case IP_VERSION(9, 1, 0):
7056 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7057 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7058 		else
7059 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7060 		break;
7061 	case IP_VERSION(9, 4, 1):
7062 		adev->gds.gds_compute_max_wave_id = 0xfff;
7063 		break;
7064 	case IP_VERSION(9, 4, 2):
7065 		/* deprecated for Aldebaran, no usage at all */
7066 		adev->gds.gds_compute_max_wave_id = 0;
7067 		break;
7068 	default:
7069 		/* this really depends on the chip */
7070 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7071 		break;
7072 	}
7073 
7074 	adev->gds.gws_size = 64;
7075 	adev->gds.oa_size = 16;
7076 }
7077 
7078 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7079 						 u32 bitmap)
7080 {
7081 	u32 data;
7082 
7083 	if (!bitmap)
7084 		return;
7085 
7086 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7087 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7088 
7089 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7090 }
7091 
7092 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7093 {
7094 	u32 data, mask;
7095 
7096 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7097 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7098 
7099 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7100 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7101 
7102 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7103 
7104 	return (~data) & mask;
7105 }
7106 
7107 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7108 				 struct amdgpu_cu_info *cu_info)
7109 {
7110 	int i, j, k, counter, active_cu_number = 0;
7111 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7112 	unsigned disable_masks[4 * 4];
7113 
7114 	if (!adev || !cu_info)
7115 		return -EINVAL;
7116 
7117 	/*
7118 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7119 	 */
7120 	if (adev->gfx.config.max_shader_engines *
7121 		adev->gfx.config.max_sh_per_se > 16)
7122 		return -EINVAL;
7123 
7124 	amdgpu_gfx_parse_disable_cu(disable_masks,
7125 				    adev->gfx.config.max_shader_engines,
7126 				    adev->gfx.config.max_sh_per_se);
7127 
7128 	mutex_lock(&adev->grbm_idx_mutex);
7129 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7130 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7131 			mask = 1;
7132 			ao_bitmap = 0;
7133 			counter = 0;
7134 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7135 			gfx_v9_0_set_user_cu_inactive_bitmap(
7136 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7137 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7138 
7139 			/*
7140 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7141 			 * 4x4 size array, and it's usually suitable for Vega
7142 			 * ASICs which has 4*2 SE/SH layout.
7143 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7144 			 * To mostly reduce the impact, we make it compatible
7145 			 * with current bitmap array as below:
7146 			 *    SE4,SH0 --> bitmap[0][1]
7147 			 *    SE5,SH0 --> bitmap[1][1]
7148 			 *    SE6,SH0 --> bitmap[2][1]
7149 			 *    SE7,SH0 --> bitmap[3][1]
7150 			 */
7151 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7152 
7153 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7154 				if (bitmap & mask) {
7155 					if (counter < adev->gfx.config.max_cu_per_sh)
7156 						ao_bitmap |= mask;
7157 					counter ++;
7158 				}
7159 				mask <<= 1;
7160 			}
7161 			active_cu_number += counter;
7162 			if (i < 2 && j < 2)
7163 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7164 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7165 		}
7166 	}
7167 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7168 	mutex_unlock(&adev->grbm_idx_mutex);
7169 
7170 	cu_info->number = active_cu_number;
7171 	cu_info->ao_cu_mask = ao_cu_mask;
7172 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7173 
7174 	return 0;
7175 }
7176 
7177 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7178 {
7179 	.type = AMD_IP_BLOCK_TYPE_GFX,
7180 	.major = 9,
7181 	.minor = 0,
7182 	.rev = 0,
7183 	.funcs = &gfx_v9_0_ip_funcs,
7184 };
7185