xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 8aaaf2f3af2ae212428f4db1af34214225f5cec3)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147 
148 enum ta_ras_gfx_subblock {
149 	/*CPC*/
150 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 	TA_RAS_BLOCK__GFX_CPC_UCODE,
153 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 	/* CPF*/
161 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 	TA_RAS_BLOCK__GFX_CPF_TAG,
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 	/* CPG*/
167 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 	TA_RAS_BLOCK__GFX_CPG_TAG,
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 	/* GDS*/
173 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 	/* SPI*/
181 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 	/* SQ*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 	/* SQC (3 ranges)*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	/* SQC range 0*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	/* SQC range 1*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	/* SQC range 2*/
218 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 	/* TA*/
233 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 	/* TCA*/
241 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 	/* TCC (5 sub-ranges)*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	/* TCC range 0*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 	/* TCC range 1*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	/* TCC range 2*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	/* TCC range 3*/
277 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	/* TCC range 4*/
283 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 	/* TCI*/
291 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 	/* TCP*/
293 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 	/* TD*/
303 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 	/* EA (3 sub-ranges)*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	/* EA range 0*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 	/* EA range 1*/
322 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 	/* EA range 2*/
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 	/* UTC VM L2 bank*/
340 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 	/* UTC VM walker*/
342 	TA_RAS_BLOCK__UTC_VML2_WALKER,
343 	/* UTC ATC L2 2MB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 	/* UTC ATC L2 4KB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 	TA_RAS_BLOCK__GFX_MAX
348 };
349 
350 struct ras_gfx_subblock {
351 	unsigned char *name;
352 	int ta_subblock;
353 	int hw_supported_error_type;
354 	int sw_supported_error_type;
355 };
356 
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359 		#subblock,                                                     \
360 		TA_RAS_BLOCK__##subblock,                                      \
361 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363 	}
364 
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 			     0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 			     1),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 			     0, 0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 			     0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 			     0, 0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 			     0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 			     0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 			     0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 			     0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538 
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560 
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575 
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603 
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614 
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637 
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653 
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680 
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697 
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712 
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717 
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729 
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741 
742 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
743 {
744 	static void *scratch_reg0;
745 	static void *scratch_reg1;
746 	static void *scratch_reg2;
747 	static void *scratch_reg3;
748 	static void *spare_int;
749 	static uint32_t grbm_cntl;
750 	static uint32_t grbm_idx;
751 
752 	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
753 	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
754 	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
755 	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
756 	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
757 
758 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
759 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
760 
761 	if (amdgpu_sriov_runtime(adev)) {
762 		pr_err("shouldn't call rlcg write register during runtime\n");
763 		return;
764 	}
765 
766 	if (offset == grbm_cntl || offset == grbm_idx) {
767 		if (offset  == grbm_cntl)
768 			writel(v, scratch_reg2);
769 		else if (offset == grbm_idx)
770 			writel(v, scratch_reg3);
771 
772 		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
773 	} else {
774 		uint32_t i = 0;
775 		uint32_t retries = 50000;
776 
777 		writel(v, scratch_reg0);
778 		writel(offset | 0x80000000, scratch_reg1);
779 		writel(1, spare_int);
780 		for (i = 0; i < retries; i++) {
781 			u32 tmp;
782 
783 			tmp = readl(scratch_reg1);
784 			if (!(tmp & 0x80000000))
785 				break;
786 
787 			udelay(10);
788 		}
789 		if (i >= retries)
790 			pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
791 	}
792 
793 }
794 
795 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
796 			       u32 v, u32 acc_flags, u32 hwip)
797 {
798 	if ((acc_flags & AMDGPU_REGS_RLC) &&
799 	    amdgpu_sriov_fullaccess(adev)) {
800 		gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
801 
802 		return;
803 	}
804 
805 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
806 		WREG32_NO_KIQ(offset, v);
807 	else
808 		WREG32(offset, v);
809 }
810 
811 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
812 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
813 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
814 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
815 
816 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
817 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
818 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
819 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
821 				struct amdgpu_cu_info *cu_info);
822 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
823 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
824 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
825 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
826 					  void *ras_error_status);
827 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
828 				     void *inject_if);
829 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
830 
831 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
832 				uint64_t queue_mask)
833 {
834 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
835 	amdgpu_ring_write(kiq_ring,
836 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
837 		/* vmid_mask:0* queue_type:0 (KIQ) */
838 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
839 	amdgpu_ring_write(kiq_ring,
840 			lower_32_bits(queue_mask));	/* queue mask lo */
841 	amdgpu_ring_write(kiq_ring,
842 			upper_32_bits(queue_mask));	/* queue mask hi */
843 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
844 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
845 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
846 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
847 }
848 
849 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
850 				 struct amdgpu_ring *ring)
851 {
852 	struct amdgpu_device *adev = kiq_ring->adev;
853 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
854 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
855 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
856 
857 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
858 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
859 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
860 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
861 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
862 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
863 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
864 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
865 			 /*queue_type: normal compute queue */
866 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
867 			 /* alloc format: all_on_one_pipe */
868 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
869 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
870 			 /* num_queues: must be 1 */
871 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
872 	amdgpu_ring_write(kiq_ring,
873 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
874 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
875 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
876 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
877 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
878 }
879 
880 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
881 				   struct amdgpu_ring *ring,
882 				   enum amdgpu_unmap_queues_action action,
883 				   u64 gpu_addr, u64 seq)
884 {
885 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
886 
887 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
888 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
890 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
891 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
892 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
893 	amdgpu_ring_write(kiq_ring,
894 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
895 
896 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
897 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
898 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
899 		amdgpu_ring_write(kiq_ring, seq);
900 	} else {
901 		amdgpu_ring_write(kiq_ring, 0);
902 		amdgpu_ring_write(kiq_ring, 0);
903 		amdgpu_ring_write(kiq_ring, 0);
904 	}
905 }
906 
907 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
908 				   struct amdgpu_ring *ring,
909 				   u64 addr,
910 				   u64 seq)
911 {
912 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
913 
914 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
915 	amdgpu_ring_write(kiq_ring,
916 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
917 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
918 			  PACKET3_QUERY_STATUS_COMMAND(2));
919 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920 	amdgpu_ring_write(kiq_ring,
921 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
922 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
923 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
924 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
925 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
926 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
927 }
928 
929 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
930 				uint16_t pasid, uint32_t flush_type,
931 				bool all_hub)
932 {
933 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
934 	amdgpu_ring_write(kiq_ring,
935 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
936 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
937 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
938 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
939 }
940 
941 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
942 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
943 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
944 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
945 	.kiq_query_status = gfx_v9_0_kiq_query_status,
946 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
947 	.set_resources_size = 8,
948 	.map_queues_size = 7,
949 	.unmap_queues_size = 6,
950 	.query_status_size = 7,
951 	.invalidate_tlbs_size = 2,
952 };
953 
954 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
955 {
956 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
957 }
958 
959 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
960 {
961 	switch (adev->ip_versions[GC_HWIP][0]) {
962 	case IP_VERSION(9, 0, 1):
963 		soc15_program_register_sequence(adev,
964 						golden_settings_gc_9_0,
965 						ARRAY_SIZE(golden_settings_gc_9_0));
966 		soc15_program_register_sequence(adev,
967 						golden_settings_gc_9_0_vg10,
968 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
969 		break;
970 	case IP_VERSION(9, 2, 1):
971 		soc15_program_register_sequence(adev,
972 						golden_settings_gc_9_2_1,
973 						ARRAY_SIZE(golden_settings_gc_9_2_1));
974 		soc15_program_register_sequence(adev,
975 						golden_settings_gc_9_2_1_vg12,
976 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
977 		break;
978 	case IP_VERSION(9, 4, 0):
979 		soc15_program_register_sequence(adev,
980 						golden_settings_gc_9_0,
981 						ARRAY_SIZE(golden_settings_gc_9_0));
982 		soc15_program_register_sequence(adev,
983 						golden_settings_gc_9_0_vg20,
984 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
985 		break;
986 	case IP_VERSION(9, 4, 1):
987 		soc15_program_register_sequence(adev,
988 						golden_settings_gc_9_4_1_arct,
989 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
990 		break;
991 	case IP_VERSION(9, 2, 2):
992 	case IP_VERSION(9, 1, 0):
993 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
994 						ARRAY_SIZE(golden_settings_gc_9_1));
995 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
996 			soc15_program_register_sequence(adev,
997 							golden_settings_gc_9_1_rv2,
998 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
999 		else
1000 			soc15_program_register_sequence(adev,
1001 							golden_settings_gc_9_1_rv1,
1002 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1003 		break;
1004 	 case IP_VERSION(9, 3, 0):
1005 		soc15_program_register_sequence(adev,
1006 						golden_settings_gc_9_1_rn,
1007 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1008 		return; /* for renoir, don't need common goldensetting */
1009 	case IP_VERSION(9, 4, 2):
1010 		gfx_v9_4_2_init_golden_registers(adev,
1011 						 adev->smuio.funcs->get_die_id(adev));
1012 		break;
1013 	default:
1014 		break;
1015 	}
1016 
1017 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1018 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1019 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1020 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1021 }
1022 
1023 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1024 {
1025 	adev->gfx.scratch.num_reg = 8;
1026 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1027 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1028 }
1029 
1030 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1031 				       bool wc, uint32_t reg, uint32_t val)
1032 {
1033 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1034 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1035 				WRITE_DATA_DST_SEL(0) |
1036 				(wc ? WR_CONFIRM : 0));
1037 	amdgpu_ring_write(ring, reg);
1038 	amdgpu_ring_write(ring, 0);
1039 	amdgpu_ring_write(ring, val);
1040 }
1041 
1042 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1043 				  int mem_space, int opt, uint32_t addr0,
1044 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1045 				  uint32_t inv)
1046 {
1047 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1048 	amdgpu_ring_write(ring,
1049 				 /* memory (1) or register (0) */
1050 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1051 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1052 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1053 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1054 
1055 	if (mem_space)
1056 		BUG_ON(addr0 & 0x3); /* Dword align */
1057 	amdgpu_ring_write(ring, addr0);
1058 	amdgpu_ring_write(ring, addr1);
1059 	amdgpu_ring_write(ring, ref);
1060 	amdgpu_ring_write(ring, mask);
1061 	amdgpu_ring_write(ring, inv); /* poll interval */
1062 }
1063 
1064 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1065 {
1066 	struct amdgpu_device *adev = ring->adev;
1067 	uint32_t scratch;
1068 	uint32_t tmp = 0;
1069 	unsigned i;
1070 	int r;
1071 
1072 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1073 	if (r)
1074 		return r;
1075 
1076 	WREG32(scratch, 0xCAFEDEAD);
1077 	r = amdgpu_ring_alloc(ring, 3);
1078 	if (r)
1079 		goto error_free_scratch;
1080 
1081 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1082 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1083 	amdgpu_ring_write(ring, 0xDEADBEEF);
1084 	amdgpu_ring_commit(ring);
1085 
1086 	for (i = 0; i < adev->usec_timeout; i++) {
1087 		tmp = RREG32(scratch);
1088 		if (tmp == 0xDEADBEEF)
1089 			break;
1090 		udelay(1);
1091 	}
1092 
1093 	if (i >= adev->usec_timeout)
1094 		r = -ETIMEDOUT;
1095 
1096 error_free_scratch:
1097 	amdgpu_gfx_scratch_free(adev, scratch);
1098 	return r;
1099 }
1100 
1101 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1102 {
1103 	struct amdgpu_device *adev = ring->adev;
1104 	struct amdgpu_ib ib;
1105 	struct dma_fence *f = NULL;
1106 
1107 	unsigned index;
1108 	uint64_t gpu_addr;
1109 	uint32_t tmp;
1110 	long r;
1111 
1112 	r = amdgpu_device_wb_get(adev, &index);
1113 	if (r)
1114 		return r;
1115 
1116 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1117 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1118 	memset(&ib, 0, sizeof(ib));
1119 	r = amdgpu_ib_get(adev, NULL, 16,
1120 					AMDGPU_IB_POOL_DIRECT, &ib);
1121 	if (r)
1122 		goto err1;
1123 
1124 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1125 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1126 	ib.ptr[2] = lower_32_bits(gpu_addr);
1127 	ib.ptr[3] = upper_32_bits(gpu_addr);
1128 	ib.ptr[4] = 0xDEADBEEF;
1129 	ib.length_dw = 5;
1130 
1131 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1132 	if (r)
1133 		goto err2;
1134 
1135 	r = dma_fence_wait_timeout(f, false, timeout);
1136 	if (r == 0) {
1137 		r = -ETIMEDOUT;
1138 		goto err2;
1139 	} else if (r < 0) {
1140 		goto err2;
1141 	}
1142 
1143 	tmp = adev->wb.wb[index];
1144 	if (tmp == 0xDEADBEEF)
1145 		r = 0;
1146 	else
1147 		r = -EINVAL;
1148 
1149 err2:
1150 	amdgpu_ib_free(adev, &ib, NULL);
1151 	dma_fence_put(f);
1152 err1:
1153 	amdgpu_device_wb_free(adev, index);
1154 	return r;
1155 }
1156 
1157 
1158 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1159 {
1160 	release_firmware(adev->gfx.pfp_fw);
1161 	adev->gfx.pfp_fw = NULL;
1162 	release_firmware(adev->gfx.me_fw);
1163 	adev->gfx.me_fw = NULL;
1164 	release_firmware(adev->gfx.ce_fw);
1165 	adev->gfx.ce_fw = NULL;
1166 	release_firmware(adev->gfx.rlc_fw);
1167 	adev->gfx.rlc_fw = NULL;
1168 	release_firmware(adev->gfx.mec_fw);
1169 	adev->gfx.mec_fw = NULL;
1170 	release_firmware(adev->gfx.mec2_fw);
1171 	adev->gfx.mec2_fw = NULL;
1172 
1173 	kfree(adev->gfx.rlc.register_list_format);
1174 }
1175 
1176 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1177 {
1178 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1179 
1180 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1181 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1182 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1183 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1184 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1185 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1186 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1187 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1188 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1189 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1190 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1191 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1192 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1193 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1194 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1195 }
1196 
1197 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1198 {
1199 	adev->gfx.me_fw_write_wait = false;
1200 	adev->gfx.mec_fw_write_wait = false;
1201 
1202 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1203 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1204 	    (adev->gfx.mec_feature_version < 46) ||
1205 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1206 	    (adev->gfx.pfp_feature_version < 46)))
1207 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1208 
1209 	switch (adev->ip_versions[GC_HWIP][0]) {
1210 	case IP_VERSION(9, 0, 1):
1211 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1212 		    (adev->gfx.me_feature_version >= 42) &&
1213 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1214 		    (adev->gfx.pfp_feature_version >= 42))
1215 			adev->gfx.me_fw_write_wait = true;
1216 
1217 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1218 		    (adev->gfx.mec_feature_version >= 42))
1219 			adev->gfx.mec_fw_write_wait = true;
1220 		break;
1221 	case IP_VERSION(9, 2, 1):
1222 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1223 		    (adev->gfx.me_feature_version >= 44) &&
1224 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1225 		    (adev->gfx.pfp_feature_version >= 44))
1226 			adev->gfx.me_fw_write_wait = true;
1227 
1228 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1229 		    (adev->gfx.mec_feature_version >= 44))
1230 			adev->gfx.mec_fw_write_wait = true;
1231 		break;
1232 	case IP_VERSION(9, 4, 0):
1233 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1234 		    (adev->gfx.me_feature_version >= 44) &&
1235 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1236 		    (adev->gfx.pfp_feature_version >= 44))
1237 			adev->gfx.me_fw_write_wait = true;
1238 
1239 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1240 		    (adev->gfx.mec_feature_version >= 44))
1241 			adev->gfx.mec_fw_write_wait = true;
1242 		break;
1243 	case IP_VERSION(9, 1, 0):
1244 	case IP_VERSION(9, 2, 2):
1245 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1246 		    (adev->gfx.me_feature_version >= 42) &&
1247 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1248 		    (adev->gfx.pfp_feature_version >= 42))
1249 			adev->gfx.me_fw_write_wait = true;
1250 
1251 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1252 		    (adev->gfx.mec_feature_version >= 42))
1253 			adev->gfx.mec_fw_write_wait = true;
1254 		break;
1255 	default:
1256 		adev->gfx.me_fw_write_wait = true;
1257 		adev->gfx.mec_fw_write_wait = true;
1258 		break;
1259 	}
1260 }
1261 
1262 struct amdgpu_gfxoff_quirk {
1263 	u16 chip_vendor;
1264 	u16 chip_device;
1265 	u16 subsys_vendor;
1266 	u16 subsys_device;
1267 	u8 revision;
1268 };
1269 
1270 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1271 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1272 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1273 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1274 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1275 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1276 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1277 	{ 0, 0, 0, 0, 0 },
1278 };
1279 
1280 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1281 {
1282 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1283 
1284 	while (p && p->chip_device != 0) {
1285 		if (pdev->vendor == p->chip_vendor &&
1286 		    pdev->device == p->chip_device &&
1287 		    pdev->subsystem_vendor == p->subsys_vendor &&
1288 		    pdev->subsystem_device == p->subsys_device &&
1289 		    pdev->revision == p->revision) {
1290 			return true;
1291 		}
1292 		++p;
1293 	}
1294 	return false;
1295 }
1296 
1297 static bool is_raven_kicker(struct amdgpu_device *adev)
1298 {
1299 	if (adev->pm.fw_version >= 0x41e2b)
1300 		return true;
1301 	else
1302 		return false;
1303 }
1304 
1305 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1306 {
1307 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1308 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1309 	    (adev->gfx.me_feature_version >= 52))
1310 		return true;
1311 	else
1312 		return false;
1313 }
1314 
1315 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1316 {
1317 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1318 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1319 
1320 	switch (adev->ip_versions[GC_HWIP][0]) {
1321 	case IP_VERSION(9, 0, 1):
1322 	case IP_VERSION(9, 2, 1):
1323 	case IP_VERSION(9, 4, 0):
1324 		break;
1325 	case IP_VERSION(9, 2, 2):
1326 	case IP_VERSION(9, 1, 0):
1327 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1328 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1329 		    ((!is_raven_kicker(adev) &&
1330 		      adev->gfx.rlc_fw_version < 531) ||
1331 		     (adev->gfx.rlc_feature_version < 1) ||
1332 		     !adev->gfx.rlc.is_rlc_v2_1))
1333 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1334 
1335 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1336 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1337 				AMD_PG_SUPPORT_CP |
1338 				AMD_PG_SUPPORT_RLC_SMU_HS;
1339 		break;
1340 	case IP_VERSION(9, 3, 0):
1341 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1342 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1343 				AMD_PG_SUPPORT_CP |
1344 				AMD_PG_SUPPORT_RLC_SMU_HS;
1345 		break;
1346 	default:
1347 		break;
1348 	}
1349 }
1350 
1351 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1352 					  const char *chip_name)
1353 {
1354 	char fw_name[30];
1355 	int err;
1356 	struct amdgpu_firmware_info *info = NULL;
1357 	const struct common_firmware_header *header = NULL;
1358 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1359 
1360 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1361 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1362 	if (err)
1363 		goto out;
1364 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1365 	if (err)
1366 		goto out;
1367 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1368 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1369 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1370 
1371 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1372 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1373 	if (err)
1374 		goto out;
1375 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1376 	if (err)
1377 		goto out;
1378 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1379 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1380 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1381 
1382 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1383 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1384 	if (err)
1385 		goto out;
1386 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1387 	if (err)
1388 		goto out;
1389 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1390 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1391 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1392 
1393 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1394 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1395 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1396 		info->fw = adev->gfx.pfp_fw;
1397 		header = (const struct common_firmware_header *)info->fw->data;
1398 		adev->firmware.fw_size +=
1399 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1400 
1401 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1402 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1403 		info->fw = adev->gfx.me_fw;
1404 		header = (const struct common_firmware_header *)info->fw->data;
1405 		adev->firmware.fw_size +=
1406 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1407 
1408 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1409 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1410 		info->fw = adev->gfx.ce_fw;
1411 		header = (const struct common_firmware_header *)info->fw->data;
1412 		adev->firmware.fw_size +=
1413 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414 	}
1415 
1416 out:
1417 	if (err) {
1418 		dev_err(adev->dev,
1419 			"gfx9: Failed to load firmware \"%s\"\n",
1420 			fw_name);
1421 		release_firmware(adev->gfx.pfp_fw);
1422 		adev->gfx.pfp_fw = NULL;
1423 		release_firmware(adev->gfx.me_fw);
1424 		adev->gfx.me_fw = NULL;
1425 		release_firmware(adev->gfx.ce_fw);
1426 		adev->gfx.ce_fw = NULL;
1427 	}
1428 	return err;
1429 }
1430 
1431 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1432 					  const char *chip_name)
1433 {
1434 	char fw_name[30];
1435 	int err;
1436 	struct amdgpu_firmware_info *info = NULL;
1437 	const struct common_firmware_header *header = NULL;
1438 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1439 	unsigned int *tmp = NULL;
1440 	unsigned int i = 0;
1441 	uint16_t version_major;
1442 	uint16_t version_minor;
1443 	uint32_t smu_version;
1444 
1445 	/*
1446 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1447 	 * instead of picasso_rlc.bin.
1448 	 * Judgment method:
1449 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1450 	 *          or revision >= 0xD8 && revision <= 0xDF
1451 	 * otherwise is PCO FP5
1452 	 */
1453 	if (!strcmp(chip_name, "picasso") &&
1454 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1455 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1456 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1457 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1458 		(smu_version >= 0x41e2b))
1459 		/**
1460 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1461 		*/
1462 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1463 	else
1464 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1465 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1466 	if (err)
1467 		goto out;
1468 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1469 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1470 
1471 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1472 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1473 	if (version_major == 2 && version_minor == 1)
1474 		adev->gfx.rlc.is_rlc_v2_1 = true;
1475 
1476 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1477 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1478 	adev->gfx.rlc.save_and_restore_offset =
1479 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1480 	adev->gfx.rlc.clear_state_descriptor_offset =
1481 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1482 	adev->gfx.rlc.avail_scratch_ram_locations =
1483 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1484 	adev->gfx.rlc.reg_restore_list_size =
1485 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1486 	adev->gfx.rlc.reg_list_format_start =
1487 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1488 	adev->gfx.rlc.reg_list_format_separate_start =
1489 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1490 	adev->gfx.rlc.starting_offsets_start =
1491 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1492 	adev->gfx.rlc.reg_list_format_size_bytes =
1493 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1494 	adev->gfx.rlc.reg_list_size_bytes =
1495 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1496 	adev->gfx.rlc.register_list_format =
1497 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1498 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1499 	if (!adev->gfx.rlc.register_list_format) {
1500 		err = -ENOMEM;
1501 		goto out;
1502 	}
1503 
1504 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1505 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1506 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1507 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1508 
1509 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1510 
1511 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1512 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1513 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1514 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1515 
1516 	if (adev->gfx.rlc.is_rlc_v2_1)
1517 		gfx_v9_0_init_rlc_ext_microcode(adev);
1518 
1519 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1520 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1521 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1522 		info->fw = adev->gfx.rlc_fw;
1523 		header = (const struct common_firmware_header *)info->fw->data;
1524 		adev->firmware.fw_size +=
1525 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1526 
1527 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1528 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1529 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1530 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1531 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1532 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1533 			info->fw = adev->gfx.rlc_fw;
1534 			adev->firmware.fw_size +=
1535 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1536 
1537 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1538 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1539 			info->fw = adev->gfx.rlc_fw;
1540 			adev->firmware.fw_size +=
1541 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1542 
1543 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1544 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1545 			info->fw = adev->gfx.rlc_fw;
1546 			adev->firmware.fw_size +=
1547 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1548 		}
1549 	}
1550 
1551 out:
1552 	if (err) {
1553 		dev_err(adev->dev,
1554 			"gfx9: Failed to load firmware \"%s\"\n",
1555 			fw_name);
1556 		release_firmware(adev->gfx.rlc_fw);
1557 		adev->gfx.rlc_fw = NULL;
1558 	}
1559 	return err;
1560 }
1561 
1562 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1563 {
1564 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1565 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1566 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1567 		return false;
1568 
1569 	return true;
1570 }
1571 
1572 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1573 					  const char *chip_name)
1574 {
1575 	char fw_name[30];
1576 	int err;
1577 	struct amdgpu_firmware_info *info = NULL;
1578 	const struct common_firmware_header *header = NULL;
1579 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1580 
1581 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1582 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1583 	if (err)
1584 		goto out;
1585 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1586 	if (err)
1587 		goto out;
1588 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1589 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1590 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1591 
1592 
1593 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1594 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1595 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1596 		if (!err) {
1597 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1598 			if (err)
1599 				goto out;
1600 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1601 			adev->gfx.mec2_fw->data;
1602 			adev->gfx.mec2_fw_version =
1603 			le32_to_cpu(cp_hdr->header.ucode_version);
1604 			adev->gfx.mec2_feature_version =
1605 			le32_to_cpu(cp_hdr->ucode_feature_version);
1606 		} else {
1607 			err = 0;
1608 			adev->gfx.mec2_fw = NULL;
1609 		}
1610 	} else {
1611 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1612 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1613 	}
1614 
1615 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1616 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1617 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1618 		info->fw = adev->gfx.mec_fw;
1619 		header = (const struct common_firmware_header *)info->fw->data;
1620 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1621 		adev->firmware.fw_size +=
1622 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1623 
1624 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1625 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1626 		info->fw = adev->gfx.mec_fw;
1627 		adev->firmware.fw_size +=
1628 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1629 
1630 		if (adev->gfx.mec2_fw) {
1631 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1632 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1633 			info->fw = adev->gfx.mec2_fw;
1634 			header = (const struct common_firmware_header *)info->fw->data;
1635 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1636 			adev->firmware.fw_size +=
1637 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1638 
1639 			/* TODO: Determine if MEC2 JT FW loading can be removed
1640 				 for all GFX V9 asic and above */
1641 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1642 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1643 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1644 				info->fw = adev->gfx.mec2_fw;
1645 				adev->firmware.fw_size +=
1646 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1647 					PAGE_SIZE);
1648 			}
1649 		}
1650 	}
1651 
1652 out:
1653 	gfx_v9_0_check_if_need_gfxoff(adev);
1654 	gfx_v9_0_check_fw_write_wait(adev);
1655 	if (err) {
1656 		dev_err(adev->dev,
1657 			"gfx9: Failed to load firmware \"%s\"\n",
1658 			fw_name);
1659 		release_firmware(adev->gfx.mec_fw);
1660 		adev->gfx.mec_fw = NULL;
1661 		release_firmware(adev->gfx.mec2_fw);
1662 		adev->gfx.mec2_fw = NULL;
1663 	}
1664 	return err;
1665 }
1666 
1667 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1668 {
1669 	const char *chip_name;
1670 	int r;
1671 
1672 	DRM_DEBUG("\n");
1673 
1674 	switch (adev->ip_versions[GC_HWIP][0]) {
1675 	case IP_VERSION(9, 0, 1):
1676 		chip_name = "vega10";
1677 		break;
1678 	case IP_VERSION(9, 2, 1):
1679 		chip_name = "vega12";
1680 		break;
1681 	case IP_VERSION(9, 4, 0):
1682 		chip_name = "vega20";
1683 		break;
1684 	case IP_VERSION(9, 2, 2):
1685 	case IP_VERSION(9, 1, 0):
1686 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1687 			chip_name = "raven2";
1688 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1689 			chip_name = "picasso";
1690 		else
1691 			chip_name = "raven";
1692 		break;
1693 	case IP_VERSION(9, 4, 1):
1694 		chip_name = "arcturus";
1695 		break;
1696 	case IP_VERSION(9, 3, 0):
1697 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1698 			chip_name = "renoir";
1699 		else
1700 			chip_name = "green_sardine";
1701 		break;
1702 	case IP_VERSION(9, 4, 2):
1703 		chip_name = "aldebaran";
1704 		break;
1705 	default:
1706 		BUG();
1707 	}
1708 
1709 	/* No CPG in Arcturus */
1710 	if (adev->gfx.num_gfx_rings) {
1711 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1712 		if (r)
1713 			return r;
1714 	}
1715 
1716 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1717 	if (r)
1718 		return r;
1719 
1720 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1721 	if (r)
1722 		return r;
1723 
1724 	return r;
1725 }
1726 
1727 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1728 {
1729 	u32 count = 0;
1730 	const struct cs_section_def *sect = NULL;
1731 	const struct cs_extent_def *ext = NULL;
1732 
1733 	/* begin clear state */
1734 	count += 2;
1735 	/* context control state */
1736 	count += 3;
1737 
1738 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1739 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1740 			if (sect->id == SECT_CONTEXT)
1741 				count += 2 + ext->reg_count;
1742 			else
1743 				return 0;
1744 		}
1745 	}
1746 
1747 	/* end clear state */
1748 	count += 2;
1749 	/* clear state */
1750 	count += 2;
1751 
1752 	return count;
1753 }
1754 
1755 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1756 				    volatile u32 *buffer)
1757 {
1758 	u32 count = 0, i;
1759 	const struct cs_section_def *sect = NULL;
1760 	const struct cs_extent_def *ext = NULL;
1761 
1762 	if (adev->gfx.rlc.cs_data == NULL)
1763 		return;
1764 	if (buffer == NULL)
1765 		return;
1766 
1767 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1768 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1769 
1770 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1771 	buffer[count++] = cpu_to_le32(0x80000000);
1772 	buffer[count++] = cpu_to_le32(0x80000000);
1773 
1774 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1775 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1776 			if (sect->id == SECT_CONTEXT) {
1777 				buffer[count++] =
1778 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1779 				buffer[count++] = cpu_to_le32(ext->reg_index -
1780 						PACKET3_SET_CONTEXT_REG_START);
1781 				for (i = 0; i < ext->reg_count; i++)
1782 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1783 			} else {
1784 				return;
1785 			}
1786 		}
1787 	}
1788 
1789 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1790 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1791 
1792 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1793 	buffer[count++] = cpu_to_le32(0);
1794 }
1795 
1796 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1797 {
1798 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1799 	uint32_t pg_always_on_cu_num = 2;
1800 	uint32_t always_on_cu_num;
1801 	uint32_t i, j, k;
1802 	uint32_t mask, cu_bitmap, counter;
1803 
1804 	if (adev->flags & AMD_IS_APU)
1805 		always_on_cu_num = 4;
1806 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1807 		always_on_cu_num = 8;
1808 	else
1809 		always_on_cu_num = 12;
1810 
1811 	mutex_lock(&adev->grbm_idx_mutex);
1812 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1813 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1814 			mask = 1;
1815 			cu_bitmap = 0;
1816 			counter = 0;
1817 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1818 
1819 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1820 				if (cu_info->bitmap[i][j] & mask) {
1821 					if (counter == pg_always_on_cu_num)
1822 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1823 					if (counter < always_on_cu_num)
1824 						cu_bitmap |= mask;
1825 					else
1826 						break;
1827 					counter++;
1828 				}
1829 				mask <<= 1;
1830 			}
1831 
1832 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1833 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1834 		}
1835 	}
1836 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1837 	mutex_unlock(&adev->grbm_idx_mutex);
1838 }
1839 
1840 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1841 {
1842 	uint32_t data;
1843 
1844 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1845 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1846 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1847 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1848 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1849 
1850 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1851 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1852 
1853 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1854 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1855 
1856 	mutex_lock(&adev->grbm_idx_mutex);
1857 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1858 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1859 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1860 
1861 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1862 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1863 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1864 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1865 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1866 
1867 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1868 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1869 	data &= 0x0000FFFF;
1870 	data |= 0x00C00000;
1871 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1872 
1873 	/*
1874 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1875 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1876 	 */
1877 
1878 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1879 	 * but used for RLC_LB_CNTL configuration */
1880 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1881 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1882 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1883 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1884 	mutex_unlock(&adev->grbm_idx_mutex);
1885 
1886 	gfx_v9_0_init_always_on_cu_mask(adev);
1887 }
1888 
1889 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1890 {
1891 	uint32_t data;
1892 
1893 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1894 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1895 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1896 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1897 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1898 
1899 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1900 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1901 
1902 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1903 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1904 
1905 	mutex_lock(&adev->grbm_idx_mutex);
1906 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1907 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1908 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1909 
1910 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1911 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1912 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1913 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1914 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1915 
1916 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1917 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1918 	data &= 0x0000FFFF;
1919 	data |= 0x00C00000;
1920 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1921 
1922 	/*
1923 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1924 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1925 	 */
1926 
1927 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1928 	 * but used for RLC_LB_CNTL configuration */
1929 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1930 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1931 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1932 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1933 	mutex_unlock(&adev->grbm_idx_mutex);
1934 
1935 	gfx_v9_0_init_always_on_cu_mask(adev);
1936 }
1937 
1938 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1939 {
1940 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1941 }
1942 
1943 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1944 {
1945 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1946 		return 5;
1947 	else
1948 		return 4;
1949 }
1950 
1951 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1952 {
1953 	const struct cs_section_def *cs_data;
1954 	int r;
1955 
1956 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1957 
1958 	cs_data = adev->gfx.rlc.cs_data;
1959 
1960 	if (cs_data) {
1961 		/* init clear state block */
1962 		r = amdgpu_gfx_rlc_init_csb(adev);
1963 		if (r)
1964 			return r;
1965 	}
1966 
1967 	if (adev->flags & AMD_IS_APU) {
1968 		/* TODO: double check the cp_table_size for RV */
1969 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1970 		r = amdgpu_gfx_rlc_init_cpt(adev);
1971 		if (r)
1972 			return r;
1973 	}
1974 
1975 	switch (adev->ip_versions[GC_HWIP][0]) {
1976 	case IP_VERSION(9, 2, 2):
1977 	case IP_VERSION(9, 1, 0):
1978 		gfx_v9_0_init_lbpw(adev);
1979 		break;
1980 	case IP_VERSION(9, 4, 0):
1981 		gfx_v9_4_init_lbpw(adev);
1982 		break;
1983 	default:
1984 		break;
1985 	}
1986 
1987 	/* init spm vmid with 0xf */
1988 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1989 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1990 
1991 	return 0;
1992 }
1993 
1994 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1995 {
1996 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1997 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1998 }
1999 
2000 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
2001 {
2002 	int r;
2003 	u32 *hpd;
2004 	const __le32 *fw_data;
2005 	unsigned fw_size;
2006 	u32 *fw;
2007 	size_t mec_hpd_size;
2008 
2009 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2010 
2011 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2012 
2013 	/* take ownership of the relevant compute queues */
2014 	amdgpu_gfx_compute_queue_acquire(adev);
2015 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2016 	if (mec_hpd_size) {
2017 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2018 					      AMDGPU_GEM_DOMAIN_VRAM,
2019 					      &adev->gfx.mec.hpd_eop_obj,
2020 					      &adev->gfx.mec.hpd_eop_gpu_addr,
2021 					      (void **)&hpd);
2022 		if (r) {
2023 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2024 			gfx_v9_0_mec_fini(adev);
2025 			return r;
2026 		}
2027 
2028 		memset(hpd, 0, mec_hpd_size);
2029 
2030 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2031 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2032 	}
2033 
2034 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2035 
2036 	fw_data = (const __le32 *)
2037 		(adev->gfx.mec_fw->data +
2038 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2039 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2040 
2041 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2042 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2043 				      &adev->gfx.mec.mec_fw_obj,
2044 				      &adev->gfx.mec.mec_fw_gpu_addr,
2045 				      (void **)&fw);
2046 	if (r) {
2047 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2048 		gfx_v9_0_mec_fini(adev);
2049 		return r;
2050 	}
2051 
2052 	memcpy(fw, fw_data, fw_size);
2053 
2054 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2055 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2056 
2057 	return 0;
2058 }
2059 
2060 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2061 {
2062 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2063 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2064 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2065 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2066 		(SQ_IND_INDEX__FORCE_READ_MASK));
2067 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2068 }
2069 
2070 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2071 			   uint32_t wave, uint32_t thread,
2072 			   uint32_t regno, uint32_t num, uint32_t *out)
2073 {
2074 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2075 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2076 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2077 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2078 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2079 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2080 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2081 	while (num--)
2082 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2083 }
2084 
2085 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2086 {
2087 	/* type 1 wave data */
2088 	dst[(*no_fields)++] = 1;
2089 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2090 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2091 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2092 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2093 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2094 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2095 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2096 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2097 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2098 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2099 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2100 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2101 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2102 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2103 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2104 }
2105 
2106 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2107 				     uint32_t wave, uint32_t start,
2108 				     uint32_t size, uint32_t *dst)
2109 {
2110 	wave_read_regs(
2111 		adev, simd, wave, 0,
2112 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2113 }
2114 
2115 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2116 				     uint32_t wave, uint32_t thread,
2117 				     uint32_t start, uint32_t size,
2118 				     uint32_t *dst)
2119 {
2120 	wave_read_regs(
2121 		adev, simd, wave, thread,
2122 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2123 }
2124 
2125 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2126 				  u32 me, u32 pipe, u32 q, u32 vm)
2127 {
2128 	soc15_grbm_select(adev, me, pipe, q, vm);
2129 }
2130 
2131 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2132         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2133         .select_se_sh = &gfx_v9_0_select_se_sh,
2134         .read_wave_data = &gfx_v9_0_read_wave_data,
2135         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2136         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2137         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2138 };
2139 
2140 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2141 	.ras_late_init = amdgpu_gfx_ras_late_init,
2142 	.ras_fini = amdgpu_gfx_ras_fini,
2143 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2144 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2145 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2146 };
2147 
2148 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2149 {
2150 	u32 gb_addr_config;
2151 	int err;
2152 
2153 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2154 
2155 	switch (adev->ip_versions[GC_HWIP][0]) {
2156 	case IP_VERSION(9, 0, 1):
2157 		adev->gfx.config.max_hw_contexts = 8;
2158 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2159 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2160 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2161 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2162 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2163 		break;
2164 	case IP_VERSION(9, 2, 1):
2165 		adev->gfx.config.max_hw_contexts = 8;
2166 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2167 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2168 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2169 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2170 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2171 		DRM_INFO("fix gfx.config for vega12\n");
2172 		break;
2173 	case IP_VERSION(9, 4, 0):
2174 		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2175 		adev->gfx.config.max_hw_contexts = 8;
2176 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2177 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2178 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2179 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2180 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2181 		gb_addr_config &= ~0xf3e777ff;
2182 		gb_addr_config |= 0x22014042;
2183 		/* check vbios table if gpu info is not available */
2184 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2185 		if (err)
2186 			return err;
2187 		break;
2188 	case IP_VERSION(9, 2, 2):
2189 	case IP_VERSION(9, 1, 0):
2190 		adev->gfx.config.max_hw_contexts = 8;
2191 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2192 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2193 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2194 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2195 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2196 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2197 		else
2198 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2199 		break;
2200 	case IP_VERSION(9, 4, 1):
2201 		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2202 		adev->gfx.config.max_hw_contexts = 8;
2203 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2204 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2205 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2206 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2207 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2208 		gb_addr_config &= ~0xf3e777ff;
2209 		gb_addr_config |= 0x22014042;
2210 		break;
2211 	case IP_VERSION(9, 3, 0):
2212 		adev->gfx.config.max_hw_contexts = 8;
2213 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2214 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2215 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2216 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2217 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2218 		gb_addr_config &= ~0xf3e777ff;
2219 		gb_addr_config |= 0x22010042;
2220 		break;
2221 	case IP_VERSION(9, 4, 2):
2222 		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2223 		adev->gfx.config.max_hw_contexts = 8;
2224 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2225 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2226 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2227 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2228 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2229 		gb_addr_config &= ~0xf3e777ff;
2230 		gb_addr_config |= 0x22014042;
2231 		/* check vbios table if gpu info is not available */
2232 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2233 		if (err)
2234 			return err;
2235 		break;
2236 	default:
2237 		BUG();
2238 		break;
2239 	}
2240 
2241 	adev->gfx.config.gb_addr_config = gb_addr_config;
2242 
2243 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2244 			REG_GET_FIELD(
2245 					adev->gfx.config.gb_addr_config,
2246 					GB_ADDR_CONFIG,
2247 					NUM_PIPES);
2248 
2249 	adev->gfx.config.max_tile_pipes =
2250 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2251 
2252 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2253 			REG_GET_FIELD(
2254 					adev->gfx.config.gb_addr_config,
2255 					GB_ADDR_CONFIG,
2256 					NUM_BANKS);
2257 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2258 			REG_GET_FIELD(
2259 					adev->gfx.config.gb_addr_config,
2260 					GB_ADDR_CONFIG,
2261 					MAX_COMPRESSED_FRAGS);
2262 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2263 			REG_GET_FIELD(
2264 					adev->gfx.config.gb_addr_config,
2265 					GB_ADDR_CONFIG,
2266 					NUM_RB_PER_SE);
2267 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2268 			REG_GET_FIELD(
2269 					adev->gfx.config.gb_addr_config,
2270 					GB_ADDR_CONFIG,
2271 					NUM_SHADER_ENGINES);
2272 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2273 			REG_GET_FIELD(
2274 					adev->gfx.config.gb_addr_config,
2275 					GB_ADDR_CONFIG,
2276 					PIPE_INTERLEAVE_SIZE));
2277 
2278 	return 0;
2279 }
2280 
2281 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2282 				      int mec, int pipe, int queue)
2283 {
2284 	unsigned irq_type;
2285 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2286 	unsigned int hw_prio;
2287 
2288 	ring = &adev->gfx.compute_ring[ring_id];
2289 
2290 	/* mec0 is me1 */
2291 	ring->me = mec + 1;
2292 	ring->pipe = pipe;
2293 	ring->queue = queue;
2294 
2295 	ring->ring_obj = NULL;
2296 	ring->use_doorbell = true;
2297 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2298 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2299 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2300 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2301 
2302 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2303 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2304 		+ ring->pipe;
2305 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2306 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2307 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2308 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2309 				hw_prio, NULL);
2310 }
2311 
2312 static int gfx_v9_0_sw_init(void *handle)
2313 {
2314 	int i, j, k, r, ring_id;
2315 	struct amdgpu_ring *ring;
2316 	struct amdgpu_kiq *kiq;
2317 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2318 
2319 	switch (adev->ip_versions[GC_HWIP][0]) {
2320 	case IP_VERSION(9, 0, 1):
2321 	case IP_VERSION(9, 2, 1):
2322 	case IP_VERSION(9, 4, 0):
2323 	case IP_VERSION(9, 2, 2):
2324 	case IP_VERSION(9, 1, 0):
2325 	case IP_VERSION(9, 4, 1):
2326 	case IP_VERSION(9, 3, 0):
2327 	case IP_VERSION(9, 4, 2):
2328 		adev->gfx.mec.num_mec = 2;
2329 		break;
2330 	default:
2331 		adev->gfx.mec.num_mec = 1;
2332 		break;
2333 	}
2334 
2335 	adev->gfx.mec.num_pipe_per_mec = 4;
2336 	adev->gfx.mec.num_queue_per_pipe = 8;
2337 
2338 	/* EOP Event */
2339 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2340 	if (r)
2341 		return r;
2342 
2343 	/* Privileged reg */
2344 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2345 			      &adev->gfx.priv_reg_irq);
2346 	if (r)
2347 		return r;
2348 
2349 	/* Privileged inst */
2350 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2351 			      &adev->gfx.priv_inst_irq);
2352 	if (r)
2353 		return r;
2354 
2355 	/* ECC error */
2356 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2357 			      &adev->gfx.cp_ecc_error_irq);
2358 	if (r)
2359 		return r;
2360 
2361 	/* FUE error */
2362 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2363 			      &adev->gfx.cp_ecc_error_irq);
2364 	if (r)
2365 		return r;
2366 
2367 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2368 
2369 	gfx_v9_0_scratch_init(adev);
2370 
2371 	r = gfx_v9_0_init_microcode(adev);
2372 	if (r) {
2373 		DRM_ERROR("Failed to load gfx firmware!\n");
2374 		return r;
2375 	}
2376 
2377 	r = adev->gfx.rlc.funcs->init(adev);
2378 	if (r) {
2379 		DRM_ERROR("Failed to init rlc BOs!\n");
2380 		return r;
2381 	}
2382 
2383 	r = gfx_v9_0_mec_init(adev);
2384 	if (r) {
2385 		DRM_ERROR("Failed to init MEC BOs!\n");
2386 		return r;
2387 	}
2388 
2389 	/* set up the gfx ring */
2390 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2391 		ring = &adev->gfx.gfx_ring[i];
2392 		ring->ring_obj = NULL;
2393 		if (!i)
2394 			sprintf(ring->name, "gfx");
2395 		else
2396 			sprintf(ring->name, "gfx_%d", i);
2397 		ring->use_doorbell = true;
2398 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2399 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2400 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2401 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2402 		if (r)
2403 			return r;
2404 	}
2405 
2406 	/* set up the compute queues - allocate horizontally across pipes */
2407 	ring_id = 0;
2408 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2409 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2410 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2411 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2412 					continue;
2413 
2414 				r = gfx_v9_0_compute_ring_init(adev,
2415 							       ring_id,
2416 							       i, k, j);
2417 				if (r)
2418 					return r;
2419 
2420 				ring_id++;
2421 			}
2422 		}
2423 	}
2424 
2425 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2426 	if (r) {
2427 		DRM_ERROR("Failed to init KIQ BOs!\n");
2428 		return r;
2429 	}
2430 
2431 	kiq = &adev->gfx.kiq;
2432 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2433 	if (r)
2434 		return r;
2435 
2436 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2437 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2438 	if (r)
2439 		return r;
2440 
2441 	adev->gfx.ce_ram_size = 0x8000;
2442 
2443 	r = gfx_v9_0_gpu_early_init(adev);
2444 	if (r)
2445 		return r;
2446 
2447 	return 0;
2448 }
2449 
2450 
2451 static int gfx_v9_0_sw_fini(void *handle)
2452 {
2453 	int i;
2454 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2455 
2456 	if (adev->gfx.ras_funcs &&
2457 	    adev->gfx.ras_funcs->ras_fini)
2458 		adev->gfx.ras_funcs->ras_fini(adev);
2459 
2460 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2461 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2462 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2463 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2464 
2465 	amdgpu_gfx_mqd_sw_fini(adev);
2466 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2467 	amdgpu_gfx_kiq_fini(adev);
2468 
2469 	gfx_v9_0_mec_fini(adev);
2470 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2471 				&adev->gfx.rlc.clear_state_gpu_addr,
2472 				(void **)&adev->gfx.rlc.cs_ptr);
2473 	if (adev->flags & AMD_IS_APU) {
2474 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2475 				&adev->gfx.rlc.cp_table_gpu_addr,
2476 				(void **)&adev->gfx.rlc.cp_table_ptr);
2477 	}
2478 	gfx_v9_0_free_microcode(adev);
2479 
2480 	return 0;
2481 }
2482 
2483 
2484 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2485 {
2486 	/* TODO */
2487 }
2488 
2489 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2490 			   u32 instance)
2491 {
2492 	u32 data;
2493 
2494 	if (instance == 0xffffffff)
2495 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2496 	else
2497 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2498 
2499 	if (se_num == 0xffffffff)
2500 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2501 	else
2502 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2503 
2504 	if (sh_num == 0xffffffff)
2505 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2506 	else
2507 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2508 
2509 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2510 }
2511 
2512 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2513 {
2514 	u32 data, mask;
2515 
2516 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2517 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2518 
2519 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2520 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2521 
2522 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2523 					 adev->gfx.config.max_sh_per_se);
2524 
2525 	return (~data) & mask;
2526 }
2527 
2528 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2529 {
2530 	int i, j;
2531 	u32 data;
2532 	u32 active_rbs = 0;
2533 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2534 					adev->gfx.config.max_sh_per_se;
2535 
2536 	mutex_lock(&adev->grbm_idx_mutex);
2537 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2538 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2539 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2540 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2541 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2542 					       rb_bitmap_width_per_sh);
2543 		}
2544 	}
2545 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2546 	mutex_unlock(&adev->grbm_idx_mutex);
2547 
2548 	adev->gfx.config.backend_enable_mask = active_rbs;
2549 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2550 }
2551 
2552 #define DEFAULT_SH_MEM_BASES	(0x6000)
2553 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2554 {
2555 	int i;
2556 	uint32_t sh_mem_config;
2557 	uint32_t sh_mem_bases;
2558 
2559 	/*
2560 	 * Configure apertures:
2561 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2562 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2563 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2564 	 */
2565 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2566 
2567 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2568 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2569 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2570 
2571 	mutex_lock(&adev->srbm_mutex);
2572 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2573 		soc15_grbm_select(adev, 0, 0, 0, i);
2574 		/* CP and shaders */
2575 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2576 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2577 	}
2578 	soc15_grbm_select(adev, 0, 0, 0, 0);
2579 	mutex_unlock(&adev->srbm_mutex);
2580 
2581 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2582 	   acccess. These should be enabled by FW for target VMIDs. */
2583 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2584 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2585 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2586 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2587 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2588 	}
2589 }
2590 
2591 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2592 {
2593 	int vmid;
2594 
2595 	/*
2596 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2597 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2598 	 * the driver can enable them for graphics. VMID0 should maintain
2599 	 * access so that HWS firmware can save/restore entries.
2600 	 */
2601 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2602 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2603 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2604 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2605 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2606 	}
2607 }
2608 
2609 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2610 {
2611 	uint32_t tmp;
2612 
2613 	switch (adev->ip_versions[GC_HWIP][0]) {
2614 	case IP_VERSION(9, 4, 1):
2615 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2616 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2617 					DISABLE_BARRIER_WAITCNT, 1);
2618 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2619 		break;
2620 	default:
2621 		break;
2622 	}
2623 }
2624 
2625 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2626 {
2627 	u32 tmp;
2628 	int i;
2629 
2630 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2631 
2632 	gfx_v9_0_tiling_mode_table_init(adev);
2633 
2634 	gfx_v9_0_setup_rb(adev);
2635 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2636 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2637 
2638 	/* XXX SH_MEM regs */
2639 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2640 	mutex_lock(&adev->srbm_mutex);
2641 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2642 		soc15_grbm_select(adev, 0, 0, 0, i);
2643 		/* CP and shaders */
2644 		if (i == 0) {
2645 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648 					    !!adev->gmc.noretry);
2649 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2651 		} else {
2652 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2653 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2654 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2655 					    !!adev->gmc.noretry);
2656 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2657 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2658 				(adev->gmc.private_aperture_start >> 48));
2659 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2660 				(adev->gmc.shared_aperture_start >> 48));
2661 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2662 		}
2663 	}
2664 	soc15_grbm_select(adev, 0, 0, 0, 0);
2665 
2666 	mutex_unlock(&adev->srbm_mutex);
2667 
2668 	gfx_v9_0_init_compute_vmid(adev);
2669 	gfx_v9_0_init_gds_vmid(adev);
2670 	gfx_v9_0_init_sq_config(adev);
2671 }
2672 
2673 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2674 {
2675 	u32 i, j, k;
2676 	u32 mask;
2677 
2678 	mutex_lock(&adev->grbm_idx_mutex);
2679 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2680 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2681 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2682 			for (k = 0; k < adev->usec_timeout; k++) {
2683 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2684 					break;
2685 				udelay(1);
2686 			}
2687 			if (k == adev->usec_timeout) {
2688 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2689 						      0xffffffff, 0xffffffff);
2690 				mutex_unlock(&adev->grbm_idx_mutex);
2691 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2692 					 i, j);
2693 				return;
2694 			}
2695 		}
2696 	}
2697 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2698 	mutex_unlock(&adev->grbm_idx_mutex);
2699 
2700 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2701 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2702 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2703 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2704 	for (k = 0; k < adev->usec_timeout; k++) {
2705 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2706 			break;
2707 		udelay(1);
2708 	}
2709 }
2710 
2711 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2712 					       bool enable)
2713 {
2714 	u32 tmp;
2715 
2716 	/* These interrupts should be enabled to drive DS clock */
2717 
2718 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2719 
2720 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2721 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2722 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2723 	if(adev->gfx.num_gfx_rings)
2724 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2725 
2726 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2727 }
2728 
2729 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2730 {
2731 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2732 	/* csib */
2733 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2734 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2735 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2736 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2737 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2738 			adev->gfx.rlc.clear_state_size);
2739 }
2740 
2741 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2742 				int indirect_offset,
2743 				int list_size,
2744 				int *unique_indirect_regs,
2745 				int unique_indirect_reg_count,
2746 				int *indirect_start_offsets,
2747 				int *indirect_start_offsets_count,
2748 				int max_start_offsets_count)
2749 {
2750 	int idx;
2751 
2752 	for (; indirect_offset < list_size; indirect_offset++) {
2753 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2754 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2755 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2756 
2757 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2758 			indirect_offset += 2;
2759 
2760 			/* look for the matching indice */
2761 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2762 				if (unique_indirect_regs[idx] ==
2763 					register_list_format[indirect_offset] ||
2764 					!unique_indirect_regs[idx])
2765 					break;
2766 			}
2767 
2768 			BUG_ON(idx >= unique_indirect_reg_count);
2769 
2770 			if (!unique_indirect_regs[idx])
2771 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2772 
2773 			indirect_offset++;
2774 		}
2775 	}
2776 }
2777 
2778 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2779 {
2780 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2781 	int unique_indirect_reg_count = 0;
2782 
2783 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2784 	int indirect_start_offsets_count = 0;
2785 
2786 	int list_size = 0;
2787 	int i = 0, j = 0;
2788 	u32 tmp = 0;
2789 
2790 	u32 *register_list_format =
2791 		kmemdup(adev->gfx.rlc.register_list_format,
2792 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2793 	if (!register_list_format)
2794 		return -ENOMEM;
2795 
2796 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2797 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2798 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2799 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2800 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2801 				    unique_indirect_regs,
2802 				    unique_indirect_reg_count,
2803 				    indirect_start_offsets,
2804 				    &indirect_start_offsets_count,
2805 				    ARRAY_SIZE(indirect_start_offsets));
2806 
2807 	/* enable auto inc in case it is disabled */
2808 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2809 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2810 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2811 
2812 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2813 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2814 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2815 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2816 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2817 			adev->gfx.rlc.register_restore[i]);
2818 
2819 	/* load indirect register */
2820 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2821 		adev->gfx.rlc.reg_list_format_start);
2822 
2823 	/* direct register portion */
2824 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2825 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2826 			register_list_format[i]);
2827 
2828 	/* indirect register portion */
2829 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2830 		if (register_list_format[i] == 0xFFFFFFFF) {
2831 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2832 			continue;
2833 		}
2834 
2835 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2836 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2837 
2838 		for (j = 0; j < unique_indirect_reg_count; j++) {
2839 			if (register_list_format[i] == unique_indirect_regs[j]) {
2840 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2841 				break;
2842 			}
2843 		}
2844 
2845 		BUG_ON(j >= unique_indirect_reg_count);
2846 
2847 		i++;
2848 	}
2849 
2850 	/* set save/restore list size */
2851 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2852 	list_size = list_size >> 1;
2853 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2854 		adev->gfx.rlc.reg_restore_list_size);
2855 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2856 
2857 	/* write the starting offsets to RLC scratch ram */
2858 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2859 		adev->gfx.rlc.starting_offsets_start);
2860 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2861 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2862 		       indirect_start_offsets[i]);
2863 
2864 	/* load unique indirect regs*/
2865 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2866 		if (unique_indirect_regs[i] != 0) {
2867 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2868 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2869 			       unique_indirect_regs[i] & 0x3FFFF);
2870 
2871 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2872 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2873 			       unique_indirect_regs[i] >> 20);
2874 		}
2875 	}
2876 
2877 	kfree(register_list_format);
2878 	return 0;
2879 }
2880 
2881 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2882 {
2883 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2884 }
2885 
2886 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2887 					     bool enable)
2888 {
2889 	uint32_t data = 0;
2890 	uint32_t default_data = 0;
2891 
2892 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2893 	if (enable) {
2894 		/* enable GFXIP control over CGPG */
2895 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2896 		if(default_data != data)
2897 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2898 
2899 		/* update status */
2900 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2901 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2902 		if(default_data != data)
2903 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2904 	} else {
2905 		/* restore GFXIP control over GCPG */
2906 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2907 		if(default_data != data)
2908 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2909 	}
2910 }
2911 
2912 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2913 {
2914 	uint32_t data = 0;
2915 
2916 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2917 			      AMD_PG_SUPPORT_GFX_SMG |
2918 			      AMD_PG_SUPPORT_GFX_DMG)) {
2919 		/* init IDLE_POLL_COUNT = 60 */
2920 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2921 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2922 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2923 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2924 
2925 		/* init RLC PG Delay */
2926 		data = 0;
2927 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2928 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2929 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2930 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2931 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2932 
2933 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2934 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2935 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2936 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2937 
2938 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2939 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2940 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2941 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2942 
2943 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2944 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2945 
2946 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2947 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2948 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2949 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2950 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2951 	}
2952 }
2953 
2954 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2955 						bool enable)
2956 {
2957 	uint32_t data = 0;
2958 	uint32_t default_data = 0;
2959 
2960 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2961 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2962 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2963 			     enable ? 1 : 0);
2964 	if (default_data != data)
2965 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2966 }
2967 
2968 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2969 						bool enable)
2970 {
2971 	uint32_t data = 0;
2972 	uint32_t default_data = 0;
2973 
2974 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2975 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2976 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2977 			     enable ? 1 : 0);
2978 	if(default_data != data)
2979 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2980 }
2981 
2982 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2983 					bool enable)
2984 {
2985 	uint32_t data = 0;
2986 	uint32_t default_data = 0;
2987 
2988 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2989 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2990 			     CP_PG_DISABLE,
2991 			     enable ? 0 : 1);
2992 	if(default_data != data)
2993 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2994 }
2995 
2996 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2997 						bool enable)
2998 {
2999 	uint32_t data, default_data;
3000 
3001 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3002 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3003 			     GFX_POWER_GATING_ENABLE,
3004 			     enable ? 1 : 0);
3005 	if(default_data != data)
3006 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3007 }
3008 
3009 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3010 						bool enable)
3011 {
3012 	uint32_t data, default_data;
3013 
3014 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3015 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3016 			     GFX_PIPELINE_PG_ENABLE,
3017 			     enable ? 1 : 0);
3018 	if(default_data != data)
3019 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3020 
3021 	if (!enable)
3022 		/* read any GFX register to wake up GFX */
3023 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3024 }
3025 
3026 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3027 						       bool enable)
3028 {
3029 	uint32_t data, default_data;
3030 
3031 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3032 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3033 			     STATIC_PER_CU_PG_ENABLE,
3034 			     enable ? 1 : 0);
3035 	if(default_data != data)
3036 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3037 }
3038 
3039 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3040 						bool enable)
3041 {
3042 	uint32_t data, default_data;
3043 
3044 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3045 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3046 			     DYN_PER_CU_PG_ENABLE,
3047 			     enable ? 1 : 0);
3048 	if(default_data != data)
3049 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3050 }
3051 
3052 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3053 {
3054 	gfx_v9_0_init_csb(adev);
3055 
3056 	/*
3057 	 * Rlc save restore list is workable since v2_1.
3058 	 * And it's needed by gfxoff feature.
3059 	 */
3060 	if (adev->gfx.rlc.is_rlc_v2_1) {
3061 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3062 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3063 			gfx_v9_1_init_rlc_save_restore_list(adev);
3064 		gfx_v9_0_enable_save_restore_machine(adev);
3065 	}
3066 
3067 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3068 			      AMD_PG_SUPPORT_GFX_SMG |
3069 			      AMD_PG_SUPPORT_GFX_DMG |
3070 			      AMD_PG_SUPPORT_CP |
3071 			      AMD_PG_SUPPORT_GDS |
3072 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3073 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3074 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3075 		gfx_v9_0_init_gfx_power_gating(adev);
3076 	}
3077 }
3078 
3079 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3080 {
3081 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3082 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3083 	gfx_v9_0_wait_for_rlc_serdes(adev);
3084 }
3085 
3086 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3087 {
3088 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3089 	udelay(50);
3090 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3091 	udelay(50);
3092 }
3093 
3094 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3095 {
3096 #ifdef AMDGPU_RLC_DEBUG_RETRY
3097 	u32 rlc_ucode_ver;
3098 #endif
3099 
3100 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3101 	udelay(50);
3102 
3103 	/* carrizo do enable cp interrupt after cp inited */
3104 	if (!(adev->flags & AMD_IS_APU)) {
3105 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3106 		udelay(50);
3107 	}
3108 
3109 #ifdef AMDGPU_RLC_DEBUG_RETRY
3110 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3111 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3112 	if(rlc_ucode_ver == 0x108) {
3113 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3114 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3115 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3116 		 * default is 0x9C4 to create a 100us interval */
3117 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3118 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3119 		 * to disable the page fault retry interrupts, default is
3120 		 * 0x100 (256) */
3121 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3122 	}
3123 #endif
3124 }
3125 
3126 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3127 {
3128 	const struct rlc_firmware_header_v2_0 *hdr;
3129 	const __le32 *fw_data;
3130 	unsigned i, fw_size;
3131 
3132 	if (!adev->gfx.rlc_fw)
3133 		return -EINVAL;
3134 
3135 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3136 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3137 
3138 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3139 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3140 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3141 
3142 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3143 			RLCG_UCODE_LOADING_START_ADDRESS);
3144 	for (i = 0; i < fw_size; i++)
3145 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3146 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3147 
3148 	return 0;
3149 }
3150 
3151 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3152 {
3153 	int r;
3154 
3155 	if (amdgpu_sriov_vf(adev)) {
3156 		gfx_v9_0_init_csb(adev);
3157 		return 0;
3158 	}
3159 
3160 	adev->gfx.rlc.funcs->stop(adev);
3161 
3162 	/* disable CG */
3163 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3164 
3165 	gfx_v9_0_init_pg(adev);
3166 
3167 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3168 		/* legacy rlc firmware loading */
3169 		r = gfx_v9_0_rlc_load_microcode(adev);
3170 		if (r)
3171 			return r;
3172 	}
3173 
3174 	switch (adev->ip_versions[GC_HWIP][0]) {
3175 	case IP_VERSION(9, 2, 2):
3176 	case IP_VERSION(9, 1, 0):
3177 		if (amdgpu_lbpw == 0)
3178 			gfx_v9_0_enable_lbpw(adev, false);
3179 		else
3180 			gfx_v9_0_enable_lbpw(adev, true);
3181 		break;
3182 	case IP_VERSION(9, 4, 0):
3183 		if (amdgpu_lbpw > 0)
3184 			gfx_v9_0_enable_lbpw(adev, true);
3185 		else
3186 			gfx_v9_0_enable_lbpw(adev, false);
3187 		break;
3188 	default:
3189 		break;
3190 	}
3191 
3192 	adev->gfx.rlc.funcs->start(adev);
3193 
3194 	return 0;
3195 }
3196 
3197 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3198 {
3199 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3200 
3201 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3202 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3203 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3204 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3205 	udelay(50);
3206 }
3207 
3208 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3209 {
3210 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3211 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3212 	const struct gfx_firmware_header_v1_0 *me_hdr;
3213 	const __le32 *fw_data;
3214 	unsigned i, fw_size;
3215 
3216 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3217 		return -EINVAL;
3218 
3219 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3220 		adev->gfx.pfp_fw->data;
3221 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3222 		adev->gfx.ce_fw->data;
3223 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3224 		adev->gfx.me_fw->data;
3225 
3226 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3227 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3228 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3229 
3230 	gfx_v9_0_cp_gfx_enable(adev, false);
3231 
3232 	/* PFP */
3233 	fw_data = (const __le32 *)
3234 		(adev->gfx.pfp_fw->data +
3235 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3236 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3237 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3238 	for (i = 0; i < fw_size; i++)
3239 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3240 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3241 
3242 	/* CE */
3243 	fw_data = (const __le32 *)
3244 		(adev->gfx.ce_fw->data +
3245 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3246 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3247 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3248 	for (i = 0; i < fw_size; i++)
3249 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3250 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3251 
3252 	/* ME */
3253 	fw_data = (const __le32 *)
3254 		(adev->gfx.me_fw->data +
3255 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3256 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3257 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3258 	for (i = 0; i < fw_size; i++)
3259 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3260 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3261 
3262 	return 0;
3263 }
3264 
3265 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3266 {
3267 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3268 	const struct cs_section_def *sect = NULL;
3269 	const struct cs_extent_def *ext = NULL;
3270 	int r, i, tmp;
3271 
3272 	/* init the CP */
3273 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3274 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3275 
3276 	gfx_v9_0_cp_gfx_enable(adev, true);
3277 
3278 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3279 	if (r) {
3280 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3281 		return r;
3282 	}
3283 
3284 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3285 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3286 
3287 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3288 	amdgpu_ring_write(ring, 0x80000000);
3289 	amdgpu_ring_write(ring, 0x80000000);
3290 
3291 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3292 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3293 			if (sect->id == SECT_CONTEXT) {
3294 				amdgpu_ring_write(ring,
3295 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3296 					       ext->reg_count));
3297 				amdgpu_ring_write(ring,
3298 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3299 				for (i = 0; i < ext->reg_count; i++)
3300 					amdgpu_ring_write(ring, ext->extent[i]);
3301 			}
3302 		}
3303 	}
3304 
3305 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3306 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3307 
3308 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3309 	amdgpu_ring_write(ring, 0);
3310 
3311 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3312 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3313 	amdgpu_ring_write(ring, 0x8000);
3314 	amdgpu_ring_write(ring, 0x8000);
3315 
3316 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3317 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3318 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3319 	amdgpu_ring_write(ring, tmp);
3320 	amdgpu_ring_write(ring, 0);
3321 
3322 	amdgpu_ring_commit(ring);
3323 
3324 	return 0;
3325 }
3326 
3327 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3328 {
3329 	struct amdgpu_ring *ring;
3330 	u32 tmp;
3331 	u32 rb_bufsz;
3332 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3333 
3334 	/* Set the write pointer delay */
3335 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3336 
3337 	/* set the RB to use vmid 0 */
3338 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3339 
3340 	/* Set ring buffer size */
3341 	ring = &adev->gfx.gfx_ring[0];
3342 	rb_bufsz = order_base_2(ring->ring_size / 8);
3343 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3344 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3345 #ifdef __BIG_ENDIAN
3346 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3347 #endif
3348 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3349 
3350 	/* Initialize the ring buffer's write pointers */
3351 	ring->wptr = 0;
3352 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3353 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3354 
3355 	/* set the wb address wether it's enabled or not */
3356 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3357 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3358 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3359 
3360 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3361 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3362 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3363 
3364 	mdelay(1);
3365 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3366 
3367 	rb_addr = ring->gpu_addr >> 8;
3368 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3369 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3370 
3371 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3372 	if (ring->use_doorbell) {
3373 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3374 				    DOORBELL_OFFSET, ring->doorbell_index);
3375 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3376 				    DOORBELL_EN, 1);
3377 	} else {
3378 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3379 	}
3380 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3381 
3382 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3383 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3384 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3385 
3386 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3387 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3388 
3389 
3390 	/* start the ring */
3391 	gfx_v9_0_cp_gfx_start(adev);
3392 	ring->sched.ready = true;
3393 
3394 	return 0;
3395 }
3396 
3397 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3398 {
3399 	if (enable) {
3400 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3401 	} else {
3402 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3403 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3404 		adev->gfx.kiq.ring.sched.ready = false;
3405 	}
3406 	udelay(50);
3407 }
3408 
3409 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3410 {
3411 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3412 	const __le32 *fw_data;
3413 	unsigned i;
3414 	u32 tmp;
3415 
3416 	if (!adev->gfx.mec_fw)
3417 		return -EINVAL;
3418 
3419 	gfx_v9_0_cp_compute_enable(adev, false);
3420 
3421 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3422 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3423 
3424 	fw_data = (const __le32 *)
3425 		(adev->gfx.mec_fw->data +
3426 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3427 	tmp = 0;
3428 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3429 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3430 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3431 
3432 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3433 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3434 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3435 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3436 
3437 	/* MEC1 */
3438 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3439 			 mec_hdr->jt_offset);
3440 	for (i = 0; i < mec_hdr->jt_size; i++)
3441 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3442 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3443 
3444 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3445 			adev->gfx.mec_fw_version);
3446 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3447 
3448 	return 0;
3449 }
3450 
3451 /* KIQ functions */
3452 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3453 {
3454 	uint32_t tmp;
3455 	struct amdgpu_device *adev = ring->adev;
3456 
3457 	/* tell RLC which is KIQ queue */
3458 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3459 	tmp &= 0xffffff00;
3460 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3461 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3462 	tmp |= 0x80;
3463 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3464 }
3465 
3466 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3467 {
3468 	struct amdgpu_device *adev = ring->adev;
3469 
3470 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3471 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3472 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3473 			mqd->cp_hqd_queue_priority =
3474 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3475 		}
3476 	}
3477 }
3478 
3479 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3480 {
3481 	struct amdgpu_device *adev = ring->adev;
3482 	struct v9_mqd *mqd = ring->mqd_ptr;
3483 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3484 	uint32_t tmp;
3485 
3486 	mqd->header = 0xC0310800;
3487 	mqd->compute_pipelinestat_enable = 0x00000001;
3488 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3489 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3490 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3491 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3492 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3493 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3494 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3495 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3496 	mqd->compute_misc_reserved = 0x00000003;
3497 
3498 	mqd->dynamic_cu_mask_addr_lo =
3499 		lower_32_bits(ring->mqd_gpu_addr
3500 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3501 	mqd->dynamic_cu_mask_addr_hi =
3502 		upper_32_bits(ring->mqd_gpu_addr
3503 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3504 
3505 	eop_base_addr = ring->eop_gpu_addr >> 8;
3506 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3507 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3508 
3509 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3510 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3511 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3512 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3513 
3514 	mqd->cp_hqd_eop_control = tmp;
3515 
3516 	/* enable doorbell? */
3517 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3518 
3519 	if (ring->use_doorbell) {
3520 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3521 				    DOORBELL_OFFSET, ring->doorbell_index);
3522 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523 				    DOORBELL_EN, 1);
3524 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525 				    DOORBELL_SOURCE, 0);
3526 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3527 				    DOORBELL_HIT, 0);
3528 	} else {
3529 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3530 					 DOORBELL_EN, 0);
3531 	}
3532 
3533 	mqd->cp_hqd_pq_doorbell_control = tmp;
3534 
3535 	/* disable the queue if it's active */
3536 	ring->wptr = 0;
3537 	mqd->cp_hqd_dequeue_request = 0;
3538 	mqd->cp_hqd_pq_rptr = 0;
3539 	mqd->cp_hqd_pq_wptr_lo = 0;
3540 	mqd->cp_hqd_pq_wptr_hi = 0;
3541 
3542 	/* set the pointer to the MQD */
3543 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3544 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3545 
3546 	/* set MQD vmid to 0 */
3547 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3548 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3549 	mqd->cp_mqd_control = tmp;
3550 
3551 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3552 	hqd_gpu_addr = ring->gpu_addr >> 8;
3553 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3554 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3555 
3556 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3557 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3558 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3559 			    (order_base_2(ring->ring_size / 4) - 1));
3560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3561 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3562 #ifdef __BIG_ENDIAN
3563 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3564 #endif
3565 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3566 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3567 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3568 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3569 	mqd->cp_hqd_pq_control = tmp;
3570 
3571 	/* set the wb address whether it's enabled or not */
3572 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3573 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3574 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3575 		upper_32_bits(wb_gpu_addr) & 0xffff;
3576 
3577 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3578 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3579 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3580 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3581 
3582 	tmp = 0;
3583 	/* enable the doorbell if requested */
3584 	if (ring->use_doorbell) {
3585 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3586 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3587 				DOORBELL_OFFSET, ring->doorbell_index);
3588 
3589 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3590 					 DOORBELL_EN, 1);
3591 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3592 					 DOORBELL_SOURCE, 0);
3593 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3594 					 DOORBELL_HIT, 0);
3595 	}
3596 
3597 	mqd->cp_hqd_pq_doorbell_control = tmp;
3598 
3599 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3600 	ring->wptr = 0;
3601 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3602 
3603 	/* set the vmid for the queue */
3604 	mqd->cp_hqd_vmid = 0;
3605 
3606 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3607 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3608 	mqd->cp_hqd_persistent_state = tmp;
3609 
3610 	/* set MIN_IB_AVAIL_SIZE */
3611 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3612 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3613 	mqd->cp_hqd_ib_control = tmp;
3614 
3615 	/* set static priority for a queue/ring */
3616 	gfx_v9_0_mqd_set_priority(ring, mqd);
3617 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3618 
3619 	/* map_queues packet doesn't need activate the queue,
3620 	 * so only kiq need set this field.
3621 	 */
3622 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3623 		mqd->cp_hqd_active = 1;
3624 
3625 	return 0;
3626 }
3627 
3628 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3629 {
3630 	struct amdgpu_device *adev = ring->adev;
3631 	struct v9_mqd *mqd = ring->mqd_ptr;
3632 	int j;
3633 
3634 	/* disable wptr polling */
3635 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3636 
3637 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3638 	       mqd->cp_hqd_eop_base_addr_lo);
3639 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3640 	       mqd->cp_hqd_eop_base_addr_hi);
3641 
3642 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3643 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3644 	       mqd->cp_hqd_eop_control);
3645 
3646 	/* enable doorbell? */
3647 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3648 	       mqd->cp_hqd_pq_doorbell_control);
3649 
3650 	/* disable the queue if it's active */
3651 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3652 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3653 		for (j = 0; j < adev->usec_timeout; j++) {
3654 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3655 				break;
3656 			udelay(1);
3657 		}
3658 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3659 		       mqd->cp_hqd_dequeue_request);
3660 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3661 		       mqd->cp_hqd_pq_rptr);
3662 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3663 		       mqd->cp_hqd_pq_wptr_lo);
3664 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3665 		       mqd->cp_hqd_pq_wptr_hi);
3666 	}
3667 
3668 	/* set the pointer to the MQD */
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3670 	       mqd->cp_mqd_base_addr_lo);
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3672 	       mqd->cp_mqd_base_addr_hi);
3673 
3674 	/* set MQD vmid to 0 */
3675 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3676 	       mqd->cp_mqd_control);
3677 
3678 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3679 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3680 	       mqd->cp_hqd_pq_base_lo);
3681 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3682 	       mqd->cp_hqd_pq_base_hi);
3683 
3684 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3685 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3686 	       mqd->cp_hqd_pq_control);
3687 
3688 	/* set the wb address whether it's enabled or not */
3689 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3690 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3691 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3692 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3693 
3694 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3695 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3696 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3697 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3698 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3699 
3700 	/* enable the doorbell if requested */
3701 	if (ring->use_doorbell) {
3702 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3703 					(adev->doorbell_index.kiq * 2) << 2);
3704 		/* If GC has entered CGPG, ringing doorbell > first page
3705 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3706 		 * workaround this issue. And this change has to align with firmware
3707 		 * update.
3708 		 */
3709 		if (check_if_enlarge_doorbell_range(adev))
3710 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3711 					(adev->doorbell.size - 4));
3712 		else
3713 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3714 					(adev->doorbell_index.userqueue_end * 2) << 2);
3715 	}
3716 
3717 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3718 	       mqd->cp_hqd_pq_doorbell_control);
3719 
3720 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3721 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3722 	       mqd->cp_hqd_pq_wptr_lo);
3723 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3724 	       mqd->cp_hqd_pq_wptr_hi);
3725 
3726 	/* set the vmid for the queue */
3727 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3728 
3729 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3730 	       mqd->cp_hqd_persistent_state);
3731 
3732 	/* activate the queue */
3733 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3734 	       mqd->cp_hqd_active);
3735 
3736 	if (ring->use_doorbell)
3737 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3738 
3739 	return 0;
3740 }
3741 
3742 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3743 {
3744 	struct amdgpu_device *adev = ring->adev;
3745 	int j;
3746 
3747 	/* disable the queue if it's active */
3748 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3749 
3750 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3751 
3752 		for (j = 0; j < adev->usec_timeout; j++) {
3753 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3754 				break;
3755 			udelay(1);
3756 		}
3757 
3758 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3759 			DRM_DEBUG("KIQ dequeue request failed.\n");
3760 
3761 			/* Manual disable if dequeue request times out */
3762 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3763 		}
3764 
3765 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3766 		      0);
3767 	}
3768 
3769 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3770 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3771 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3772 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3773 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3774 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3775 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3776 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3777 
3778 	return 0;
3779 }
3780 
3781 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3782 {
3783 	struct amdgpu_device *adev = ring->adev;
3784 	struct v9_mqd *mqd = ring->mqd_ptr;
3785 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3786 	struct v9_mqd *tmp_mqd;
3787 
3788 	gfx_v9_0_kiq_setting(ring);
3789 
3790 	/* GPU could be in bad state during probe, driver trigger the reset
3791 	 * after load the SMU, in this case , the mqd is not be initialized.
3792 	 * driver need to re-init the mqd.
3793 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3794 	 */
3795 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3796 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3797 		/* for GPU_RESET case , reset MQD to a clean status */
3798 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3799 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3800 
3801 		/* reset ring buffer */
3802 		ring->wptr = 0;
3803 		amdgpu_ring_clear_ring(ring);
3804 
3805 		mutex_lock(&adev->srbm_mutex);
3806 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3807 		gfx_v9_0_kiq_init_register(ring);
3808 		soc15_grbm_select(adev, 0, 0, 0, 0);
3809 		mutex_unlock(&adev->srbm_mutex);
3810 	} else {
3811 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3812 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3813 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3814 		mutex_lock(&adev->srbm_mutex);
3815 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3816 		gfx_v9_0_mqd_init(ring);
3817 		gfx_v9_0_kiq_init_register(ring);
3818 		soc15_grbm_select(adev, 0, 0, 0, 0);
3819 		mutex_unlock(&adev->srbm_mutex);
3820 
3821 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3822 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3823 	}
3824 
3825 	return 0;
3826 }
3827 
3828 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3829 {
3830 	struct amdgpu_device *adev = ring->adev;
3831 	struct v9_mqd *mqd = ring->mqd_ptr;
3832 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3833 	struct v9_mqd *tmp_mqd;
3834 
3835 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3836 	 * is not be initialized before
3837 	 */
3838 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3839 
3840 	if (!tmp_mqd->cp_hqd_pq_control ||
3841 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3842 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3843 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3844 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3845 		mutex_lock(&adev->srbm_mutex);
3846 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3847 		gfx_v9_0_mqd_init(ring);
3848 		soc15_grbm_select(adev, 0, 0, 0, 0);
3849 		mutex_unlock(&adev->srbm_mutex);
3850 
3851 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3852 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3853 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3854 		/* reset MQD to a clean status */
3855 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3856 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3857 
3858 		/* reset ring buffer */
3859 		ring->wptr = 0;
3860 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3861 		amdgpu_ring_clear_ring(ring);
3862 	} else {
3863 		amdgpu_ring_clear_ring(ring);
3864 	}
3865 
3866 	return 0;
3867 }
3868 
3869 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3870 {
3871 	struct amdgpu_ring *ring;
3872 	int r;
3873 
3874 	ring = &adev->gfx.kiq.ring;
3875 
3876 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3877 	if (unlikely(r != 0))
3878 		return r;
3879 
3880 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3881 	if (unlikely(r != 0))
3882 		return r;
3883 
3884 	gfx_v9_0_kiq_init_queue(ring);
3885 	amdgpu_bo_kunmap(ring->mqd_obj);
3886 	ring->mqd_ptr = NULL;
3887 	amdgpu_bo_unreserve(ring->mqd_obj);
3888 	ring->sched.ready = true;
3889 	return 0;
3890 }
3891 
3892 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3893 {
3894 	struct amdgpu_ring *ring = NULL;
3895 	int r = 0, i;
3896 
3897 	gfx_v9_0_cp_compute_enable(adev, true);
3898 
3899 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3900 		ring = &adev->gfx.compute_ring[i];
3901 
3902 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3903 		if (unlikely(r != 0))
3904 			goto done;
3905 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3906 		if (!r) {
3907 			r = gfx_v9_0_kcq_init_queue(ring);
3908 			amdgpu_bo_kunmap(ring->mqd_obj);
3909 			ring->mqd_ptr = NULL;
3910 		}
3911 		amdgpu_bo_unreserve(ring->mqd_obj);
3912 		if (r)
3913 			goto done;
3914 	}
3915 
3916 	r = amdgpu_gfx_enable_kcq(adev);
3917 done:
3918 	return r;
3919 }
3920 
3921 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3922 {
3923 	int r, i;
3924 	struct amdgpu_ring *ring;
3925 
3926 	if (!(adev->flags & AMD_IS_APU))
3927 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3928 
3929 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3930 		if (adev->gfx.num_gfx_rings) {
3931 			/* legacy firmware loading */
3932 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3933 			if (r)
3934 				return r;
3935 		}
3936 
3937 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3938 		if (r)
3939 			return r;
3940 	}
3941 
3942 	r = gfx_v9_0_kiq_resume(adev);
3943 	if (r)
3944 		return r;
3945 
3946 	if (adev->gfx.num_gfx_rings) {
3947 		r = gfx_v9_0_cp_gfx_resume(adev);
3948 		if (r)
3949 			return r;
3950 	}
3951 
3952 	r = gfx_v9_0_kcq_resume(adev);
3953 	if (r)
3954 		return r;
3955 
3956 	if (adev->gfx.num_gfx_rings) {
3957 		ring = &adev->gfx.gfx_ring[0];
3958 		r = amdgpu_ring_test_helper(ring);
3959 		if (r)
3960 			return r;
3961 	}
3962 
3963 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3964 		ring = &adev->gfx.compute_ring[i];
3965 		amdgpu_ring_test_helper(ring);
3966 	}
3967 
3968 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3969 
3970 	return 0;
3971 }
3972 
3973 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3974 {
3975 	u32 tmp;
3976 
3977 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3978 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3979 		return;
3980 
3981 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3982 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3983 				adev->df.hash_status.hash_64k);
3984 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3985 				adev->df.hash_status.hash_2m);
3986 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3987 				adev->df.hash_status.hash_1g);
3988 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3989 }
3990 
3991 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3992 {
3993 	if (adev->gfx.num_gfx_rings)
3994 		gfx_v9_0_cp_gfx_enable(adev, enable);
3995 	gfx_v9_0_cp_compute_enable(adev, enable);
3996 }
3997 
3998 static int gfx_v9_0_hw_init(void *handle)
3999 {
4000 	int r;
4001 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4002 
4003 	if (!amdgpu_sriov_vf(adev))
4004 		gfx_v9_0_init_golden_registers(adev);
4005 
4006 	gfx_v9_0_constants_init(adev);
4007 
4008 	gfx_v9_0_init_tcp_config(adev);
4009 
4010 	r = adev->gfx.rlc.funcs->resume(adev);
4011 	if (r)
4012 		return r;
4013 
4014 	r = gfx_v9_0_cp_resume(adev);
4015 	if (r)
4016 		return r;
4017 
4018 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4019 		gfx_v9_4_2_set_power_brake_sequence(adev);
4020 
4021 	return r;
4022 }
4023 
4024 static int gfx_v9_0_hw_fini(void *handle)
4025 {
4026 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4027 
4028 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4029 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4030 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4031 
4032 	/* DF freeze and kcq disable will fail */
4033 	if (!amdgpu_ras_intr_triggered())
4034 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4035 		amdgpu_gfx_disable_kcq(adev);
4036 
4037 	if (amdgpu_sriov_vf(adev)) {
4038 		gfx_v9_0_cp_gfx_enable(adev, false);
4039 		/* must disable polling for SRIOV when hw finished, otherwise
4040 		 * CPC engine may still keep fetching WB address which is already
4041 		 * invalid after sw finished and trigger DMAR reading error in
4042 		 * hypervisor side.
4043 		 */
4044 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4045 		return 0;
4046 	}
4047 
4048 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4049 	 * otherwise KIQ is hanging when binding back
4050 	 */
4051 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4052 		mutex_lock(&adev->srbm_mutex);
4053 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4054 				adev->gfx.kiq.ring.pipe,
4055 				adev->gfx.kiq.ring.queue, 0);
4056 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4057 		soc15_grbm_select(adev, 0, 0, 0, 0);
4058 		mutex_unlock(&adev->srbm_mutex);
4059 	}
4060 
4061 	gfx_v9_0_cp_enable(adev, false);
4062 
4063 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4064 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4065 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4066 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4067 		return 0;
4068 	}
4069 
4070 	adev->gfx.rlc.funcs->stop(adev);
4071 	return 0;
4072 }
4073 
4074 static int gfx_v9_0_suspend(void *handle)
4075 {
4076 	return gfx_v9_0_hw_fini(handle);
4077 }
4078 
4079 static int gfx_v9_0_resume(void *handle)
4080 {
4081 	return gfx_v9_0_hw_init(handle);
4082 }
4083 
4084 static bool gfx_v9_0_is_idle(void *handle)
4085 {
4086 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4087 
4088 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4089 				GRBM_STATUS, GUI_ACTIVE))
4090 		return false;
4091 	else
4092 		return true;
4093 }
4094 
4095 static int gfx_v9_0_wait_for_idle(void *handle)
4096 {
4097 	unsigned i;
4098 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4099 
4100 	for (i = 0; i < adev->usec_timeout; i++) {
4101 		if (gfx_v9_0_is_idle(handle))
4102 			return 0;
4103 		udelay(1);
4104 	}
4105 	return -ETIMEDOUT;
4106 }
4107 
4108 static int gfx_v9_0_soft_reset(void *handle)
4109 {
4110 	u32 grbm_soft_reset = 0;
4111 	u32 tmp;
4112 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4113 
4114 	/* GRBM_STATUS */
4115 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4116 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4117 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4118 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4119 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4120 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4121 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4122 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4123 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4124 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4125 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4126 	}
4127 
4128 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4129 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4130 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4131 	}
4132 
4133 	/* GRBM_STATUS2 */
4134 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4135 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4136 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4137 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4138 
4139 
4140 	if (grbm_soft_reset) {
4141 		/* stop the rlc */
4142 		adev->gfx.rlc.funcs->stop(adev);
4143 
4144 		if (adev->gfx.num_gfx_rings)
4145 			/* Disable GFX parsing/prefetching */
4146 			gfx_v9_0_cp_gfx_enable(adev, false);
4147 
4148 		/* Disable MEC parsing/prefetching */
4149 		gfx_v9_0_cp_compute_enable(adev, false);
4150 
4151 		if (grbm_soft_reset) {
4152 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4153 			tmp |= grbm_soft_reset;
4154 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4155 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4156 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4157 
4158 			udelay(50);
4159 
4160 			tmp &= ~grbm_soft_reset;
4161 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4162 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4163 		}
4164 
4165 		/* Wait a little for things to settle down */
4166 		udelay(50);
4167 	}
4168 	return 0;
4169 }
4170 
4171 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4172 {
4173 	signed long r, cnt = 0;
4174 	unsigned long flags;
4175 	uint32_t seq, reg_val_offs = 0;
4176 	uint64_t value = 0;
4177 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4178 	struct amdgpu_ring *ring = &kiq->ring;
4179 
4180 	BUG_ON(!ring->funcs->emit_rreg);
4181 
4182 	spin_lock_irqsave(&kiq->ring_lock, flags);
4183 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4184 		pr_err("critical bug! too many kiq readers\n");
4185 		goto failed_unlock;
4186 	}
4187 	amdgpu_ring_alloc(ring, 32);
4188 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4189 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4190 				(5 << 8) |	/* dst: memory */
4191 				(1 << 16) |	/* count sel */
4192 				(1 << 20));	/* write confirm */
4193 	amdgpu_ring_write(ring, 0);
4194 	amdgpu_ring_write(ring, 0);
4195 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4196 				reg_val_offs * 4));
4197 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4198 				reg_val_offs * 4));
4199 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4200 	if (r)
4201 		goto failed_undo;
4202 
4203 	amdgpu_ring_commit(ring);
4204 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4205 
4206 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4207 
4208 	/* don't wait anymore for gpu reset case because this way may
4209 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4210 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4211 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4212 	 * gpu_recover() hang there.
4213 	 *
4214 	 * also don't wait anymore for IRQ context
4215 	 * */
4216 	if (r < 1 && (amdgpu_in_reset(adev)))
4217 		goto failed_kiq_read;
4218 
4219 	might_sleep();
4220 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4221 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4222 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4223 	}
4224 
4225 	if (cnt > MAX_KIQ_REG_TRY)
4226 		goto failed_kiq_read;
4227 
4228 	mb();
4229 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4230 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4231 	amdgpu_device_wb_free(adev, reg_val_offs);
4232 	return value;
4233 
4234 failed_undo:
4235 	amdgpu_ring_undo(ring);
4236 failed_unlock:
4237 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4238 failed_kiq_read:
4239 	if (reg_val_offs)
4240 		amdgpu_device_wb_free(adev, reg_val_offs);
4241 	pr_err("failed to read gpu clock\n");
4242 	return ~0;
4243 }
4244 
4245 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4246 {
4247 	uint64_t clock, clock_lo, clock_hi, hi_check;
4248 
4249 	switch (adev->ip_versions[GC_HWIP][0]) {
4250 	case IP_VERSION(9, 3, 0):
4251 		preempt_disable();
4252 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4253 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4254 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4255 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4256 		 * roughly every 42 seconds.
4257 		 */
4258 		if (hi_check != clock_hi) {
4259 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4260 			clock_hi = hi_check;
4261 		}
4262 		preempt_enable();
4263 		clock = clock_lo | (clock_hi << 32ULL);
4264 		break;
4265 	default:
4266 		amdgpu_gfx_off_ctrl(adev, false);
4267 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4268 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4269 			clock = gfx_v9_0_kiq_read_clock(adev);
4270 		} else {
4271 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4272 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4273 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4274 		}
4275 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4276 		amdgpu_gfx_off_ctrl(adev, true);
4277 		break;
4278 	}
4279 	return clock;
4280 }
4281 
4282 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4283 					  uint32_t vmid,
4284 					  uint32_t gds_base, uint32_t gds_size,
4285 					  uint32_t gws_base, uint32_t gws_size,
4286 					  uint32_t oa_base, uint32_t oa_size)
4287 {
4288 	struct amdgpu_device *adev = ring->adev;
4289 
4290 	/* GDS Base */
4291 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4292 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4293 				   gds_base);
4294 
4295 	/* GDS Size */
4296 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4297 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4298 				   gds_size);
4299 
4300 	/* GWS */
4301 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4302 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4303 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4304 
4305 	/* OA */
4306 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4307 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4308 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4309 }
4310 
4311 static const u32 vgpr_init_compute_shader[] =
4312 {
4313 	0xb07c0000, 0xbe8000ff,
4314 	0x000000f8, 0xbf110800,
4315 	0x7e000280, 0x7e020280,
4316 	0x7e040280, 0x7e060280,
4317 	0x7e080280, 0x7e0a0280,
4318 	0x7e0c0280, 0x7e0e0280,
4319 	0x80808800, 0xbe803200,
4320 	0xbf84fff5, 0xbf9c0000,
4321 	0xd28c0001, 0x0001007f,
4322 	0xd28d0001, 0x0002027e,
4323 	0x10020288, 0xb8810904,
4324 	0xb7814000, 0xd1196a01,
4325 	0x00000301, 0xbe800087,
4326 	0xbefc00c1, 0xd89c4000,
4327 	0x00020201, 0xd89cc080,
4328 	0x00040401, 0x320202ff,
4329 	0x00000800, 0x80808100,
4330 	0xbf84fff8, 0x7e020280,
4331 	0xbf810000, 0x00000000,
4332 };
4333 
4334 static const u32 sgpr_init_compute_shader[] =
4335 {
4336 	0xb07c0000, 0xbe8000ff,
4337 	0x0000005f, 0xbee50080,
4338 	0xbe812c65, 0xbe822c65,
4339 	0xbe832c65, 0xbe842c65,
4340 	0xbe852c65, 0xb77c0005,
4341 	0x80808500, 0xbf84fff8,
4342 	0xbe800080, 0xbf810000,
4343 };
4344 
4345 static const u32 vgpr_init_compute_shader_arcturus[] = {
4346 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4347 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4348 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4349 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4350 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4351 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4352 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4353 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4354 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4355 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4356 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4357 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4358 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4359 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4360 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4361 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4362 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4363 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4364 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4365 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4366 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4367 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4368 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4369 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4370 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4371 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4372 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4373 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4374 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4375 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4376 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4377 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4378 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4379 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4380 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4381 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4382 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4383 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4384 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4385 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4386 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4387 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4388 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4389 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4390 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4391 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4392 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4393 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4394 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4395 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4396 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4397 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4398 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4399 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4400 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4401 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4402 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4403 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4404 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4405 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4406 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4407 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4408 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4409 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4410 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4411 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4412 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4413 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4414 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4415 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4416 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4417 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4418 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4419 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4420 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4421 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4422 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4423 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4424 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4425 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4426 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4427 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4428 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4429 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4430 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4431 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4432 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4433 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4434 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4435 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4436 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4437 	0xbf84fff8, 0xbf810000,
4438 };
4439 
4440 /* When below register arrays changed, please update gpr_reg_size,
4441   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4442   to cover all gfx9 ASICs */
4443 static const struct soc15_reg_entry vgpr_init_regs[] = {
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4458 };
4459 
4460 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4475 };
4476 
4477 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4492 };
4493 
4494 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4500    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4509 };
4510 
4511 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4512    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4513    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4514    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4515    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4516    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4517    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4518    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4519    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4520    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4521    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4522    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4523    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4524    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4525    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4526    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4527    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4528    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4529    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4530    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4531    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4532    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4533    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4534    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4535    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4536    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4537    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4538    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4539    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4540    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4541    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4542    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4543    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4544    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4545 };
4546 
4547 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4548 {
4549 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4550 	int i, r;
4551 
4552 	/* only support when RAS is enabled */
4553 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4554 		return 0;
4555 
4556 	r = amdgpu_ring_alloc(ring, 7);
4557 	if (r) {
4558 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4559 			ring->name, r);
4560 		return r;
4561 	}
4562 
4563 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4564 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4565 
4566 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4567 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4568 				PACKET3_DMA_DATA_DST_SEL(1) |
4569 				PACKET3_DMA_DATA_SRC_SEL(2) |
4570 				PACKET3_DMA_DATA_ENGINE(0)));
4571 	amdgpu_ring_write(ring, 0);
4572 	amdgpu_ring_write(ring, 0);
4573 	amdgpu_ring_write(ring, 0);
4574 	amdgpu_ring_write(ring, 0);
4575 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4576 				adev->gds.gds_size);
4577 
4578 	amdgpu_ring_commit(ring);
4579 
4580 	for (i = 0; i < adev->usec_timeout; i++) {
4581 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4582 			break;
4583 		udelay(1);
4584 	}
4585 
4586 	if (i >= adev->usec_timeout)
4587 		r = -ETIMEDOUT;
4588 
4589 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4590 
4591 	return r;
4592 }
4593 
4594 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4595 {
4596 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4597 	struct amdgpu_ib ib;
4598 	struct dma_fence *f = NULL;
4599 	int r, i;
4600 	unsigned total_size, vgpr_offset, sgpr_offset;
4601 	u64 gpu_addr;
4602 
4603 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4604 						adev->gfx.config.max_cu_per_sh *
4605 						adev->gfx.config.max_sh_per_se;
4606 	int sgpr_work_group_size = 5;
4607 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4608 	int vgpr_init_shader_size;
4609 	const u32 *vgpr_init_shader_ptr;
4610 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4611 
4612 	/* only support when RAS is enabled */
4613 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4614 		return 0;
4615 
4616 	/* bail if the compute ring is not ready */
4617 	if (!ring->sched.ready)
4618 		return 0;
4619 
4620 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4621 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4622 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4623 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4624 	} else {
4625 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4626 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4627 		vgpr_init_regs_ptr = vgpr_init_regs;
4628 	}
4629 
4630 	total_size =
4631 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4632 	total_size +=
4633 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4634 	total_size +=
4635 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4636 	total_size = ALIGN(total_size, 256);
4637 	vgpr_offset = total_size;
4638 	total_size += ALIGN(vgpr_init_shader_size, 256);
4639 	sgpr_offset = total_size;
4640 	total_size += sizeof(sgpr_init_compute_shader);
4641 
4642 	/* allocate an indirect buffer to put the commands in */
4643 	memset(&ib, 0, sizeof(ib));
4644 	r = amdgpu_ib_get(adev, NULL, total_size,
4645 					AMDGPU_IB_POOL_DIRECT, &ib);
4646 	if (r) {
4647 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4648 		return r;
4649 	}
4650 
4651 	/* load the compute shaders */
4652 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4653 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4654 
4655 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4656 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4657 
4658 	/* init the ib length to 0 */
4659 	ib.length_dw = 0;
4660 
4661 	/* VGPR */
4662 	/* write the register state for the compute dispatch */
4663 	for (i = 0; i < gpr_reg_size; i++) {
4664 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4665 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4666 								- PACKET3_SET_SH_REG_START;
4667 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4668 	}
4669 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4670 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4671 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4672 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4673 							- PACKET3_SET_SH_REG_START;
4674 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4675 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4676 
4677 	/* write dispatch packet */
4678 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4679 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4680 	ib.ptr[ib.length_dw++] = 1; /* y */
4681 	ib.ptr[ib.length_dw++] = 1; /* z */
4682 	ib.ptr[ib.length_dw++] =
4683 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4684 
4685 	/* write CS partial flush packet */
4686 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4687 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4688 
4689 	/* SGPR1 */
4690 	/* write the register state for the compute dispatch */
4691 	for (i = 0; i < gpr_reg_size; i++) {
4692 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4693 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4694 								- PACKET3_SET_SH_REG_START;
4695 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4696 	}
4697 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4698 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4699 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4700 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4701 							- PACKET3_SET_SH_REG_START;
4702 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4703 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4704 
4705 	/* write dispatch packet */
4706 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4707 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4708 	ib.ptr[ib.length_dw++] = 1; /* y */
4709 	ib.ptr[ib.length_dw++] = 1; /* z */
4710 	ib.ptr[ib.length_dw++] =
4711 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4712 
4713 	/* write CS partial flush packet */
4714 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4715 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4716 
4717 	/* SGPR2 */
4718 	/* write the register state for the compute dispatch */
4719 	for (i = 0; i < gpr_reg_size; i++) {
4720 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4721 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4722 								- PACKET3_SET_SH_REG_START;
4723 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4724 	}
4725 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4726 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4727 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4728 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4729 							- PACKET3_SET_SH_REG_START;
4730 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4731 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4732 
4733 	/* write dispatch packet */
4734 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4735 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4736 	ib.ptr[ib.length_dw++] = 1; /* y */
4737 	ib.ptr[ib.length_dw++] = 1; /* z */
4738 	ib.ptr[ib.length_dw++] =
4739 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4740 
4741 	/* write CS partial flush packet */
4742 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4743 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4744 
4745 	/* shedule the ib on the ring */
4746 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4747 	if (r) {
4748 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4749 		goto fail;
4750 	}
4751 
4752 	/* wait for the GPU to finish processing the IB */
4753 	r = dma_fence_wait(f, false);
4754 	if (r) {
4755 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4756 		goto fail;
4757 	}
4758 
4759 fail:
4760 	amdgpu_ib_free(adev, &ib, NULL);
4761 	dma_fence_put(f);
4762 
4763 	return r;
4764 }
4765 
4766 static int gfx_v9_0_early_init(void *handle)
4767 {
4768 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4769 
4770 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4771 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4772 		adev->gfx.num_gfx_rings = 0;
4773 	else
4774 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4775 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4776 					  AMDGPU_MAX_COMPUTE_RINGS);
4777 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4778 	gfx_v9_0_set_ring_funcs(adev);
4779 	gfx_v9_0_set_irq_funcs(adev);
4780 	gfx_v9_0_set_gds_init(adev);
4781 	gfx_v9_0_set_rlc_funcs(adev);
4782 
4783 	return 0;
4784 }
4785 
4786 static int gfx_v9_0_ecc_late_init(void *handle)
4787 {
4788 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4789 	int r;
4790 
4791 	/*
4792 	 * Temp workaround to fix the issue that CP firmware fails to
4793 	 * update read pointer when CPDMA is writing clearing operation
4794 	 * to GDS in suspend/resume sequence on several cards. So just
4795 	 * limit this operation in cold boot sequence.
4796 	 */
4797 	if ((!adev->in_suspend) &&
4798 	    (adev->gds.gds_size)) {
4799 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4800 		if (r)
4801 			return r;
4802 	}
4803 
4804 	/* requires IBs so do in late init after IB pool is initialized */
4805 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4806 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4807 	else
4808 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4809 
4810 	if (r)
4811 		return r;
4812 
4813 	if (adev->gfx.ras_funcs &&
4814 	    adev->gfx.ras_funcs->ras_late_init) {
4815 		r = adev->gfx.ras_funcs->ras_late_init(adev);
4816 		if (r)
4817 			return r;
4818 	}
4819 
4820 	if (adev->gfx.ras_funcs &&
4821 	    adev->gfx.ras_funcs->enable_watchdog_timer)
4822 		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4823 
4824 	return 0;
4825 }
4826 
4827 static int gfx_v9_0_late_init(void *handle)
4828 {
4829 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830 	int r;
4831 
4832 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4833 	if (r)
4834 		return r;
4835 
4836 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4837 	if (r)
4838 		return r;
4839 
4840 	r = gfx_v9_0_ecc_late_init(handle);
4841 	if (r)
4842 		return r;
4843 
4844 	return 0;
4845 }
4846 
4847 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4848 {
4849 	uint32_t rlc_setting;
4850 
4851 	/* if RLC is not enabled, do nothing */
4852 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4853 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4854 		return false;
4855 
4856 	return true;
4857 }
4858 
4859 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4860 {
4861 	uint32_t data;
4862 	unsigned i;
4863 
4864 	data = RLC_SAFE_MODE__CMD_MASK;
4865 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4866 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4867 
4868 	/* wait for RLC_SAFE_MODE */
4869 	for (i = 0; i < adev->usec_timeout; i++) {
4870 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4871 			break;
4872 		udelay(1);
4873 	}
4874 }
4875 
4876 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4877 {
4878 	uint32_t data;
4879 
4880 	data = RLC_SAFE_MODE__CMD_MASK;
4881 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4882 }
4883 
4884 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4885 						bool enable)
4886 {
4887 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4888 
4889 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4890 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4891 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4892 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4893 	} else {
4894 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4895 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4896 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4897 	}
4898 
4899 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4900 }
4901 
4902 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4903 						bool enable)
4904 {
4905 	/* TODO: double check if we need to perform under safe mode */
4906 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4907 
4908 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4909 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4910 	else
4911 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4912 
4913 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4914 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4915 	else
4916 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4917 
4918 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4919 }
4920 
4921 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4922 						      bool enable)
4923 {
4924 	uint32_t data, def;
4925 
4926 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4927 
4928 	/* It is disabled by HW by default */
4929 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4930 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4931 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4932 
4933 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4934 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4935 
4936 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4937 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4938 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4939 
4940 		/* only for Vega10 & Raven1 */
4941 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4942 
4943 		if (def != data)
4944 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4945 
4946 		/* MGLS is a global flag to control all MGLS in GFX */
4947 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4948 			/* 2 - RLC memory Light sleep */
4949 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4950 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4951 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4952 				if (def != data)
4953 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4954 			}
4955 			/* 3 - CP memory Light sleep */
4956 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4957 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4958 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4959 				if (def != data)
4960 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4961 			}
4962 		}
4963 	} else {
4964 		/* 1 - MGCG_OVERRIDE */
4965 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4966 
4967 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4968 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4969 
4970 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4971 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4972 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4973 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4974 
4975 		if (def != data)
4976 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4977 
4978 		/* 2 - disable MGLS in RLC */
4979 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4980 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4981 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4982 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4983 		}
4984 
4985 		/* 3 - disable MGLS in CP */
4986 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4987 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4988 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4989 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4990 		}
4991 	}
4992 
4993 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4994 }
4995 
4996 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4997 					   bool enable)
4998 {
4999 	uint32_t data, def;
5000 
5001 	if (!adev->gfx.num_gfx_rings)
5002 		return;
5003 
5004 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5005 
5006 	/* Enable 3D CGCG/CGLS */
5007 	if (enable) {
5008 		/* write cmd to clear cgcg/cgls ov */
5009 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5010 		/* unset CGCG override */
5011 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5012 		/* update CGCG and CGLS override bits */
5013 		if (def != data)
5014 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5015 
5016 		/* enable 3Dcgcg FSM(0x0000363f) */
5017 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5018 
5019 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5020 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5021 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5022 		else
5023 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5024 
5025 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5026 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5027 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5028 		if (def != data)
5029 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5030 
5031 		/* set IDLE_POLL_COUNT(0x00900100) */
5032 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5033 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5034 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5035 		if (def != data)
5036 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5037 	} else {
5038 		/* Disable CGCG/CGLS */
5039 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5040 		/* disable cgcg, cgls should be disabled */
5041 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5042 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5043 		/* disable cgcg and cgls in FSM */
5044 		if (def != data)
5045 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5046 	}
5047 
5048 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5049 }
5050 
5051 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5052 						      bool enable)
5053 {
5054 	uint32_t def, data;
5055 
5056 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5057 
5058 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5059 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5060 		/* unset CGCG override */
5061 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5062 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5063 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5064 		else
5065 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5066 		/* update CGCG and CGLS override bits */
5067 		if (def != data)
5068 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5069 
5070 		/* enable cgcg FSM(0x0000363F) */
5071 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5072 
5073 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5074 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5075 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5076 		else
5077 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5078 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5079 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5080 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5081 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5082 		if (def != data)
5083 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5084 
5085 		/* set IDLE_POLL_COUNT(0x00900100) */
5086 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5087 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5088 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5089 		if (def != data)
5090 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5091 	} else {
5092 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5093 		/* reset CGCG/CGLS bits */
5094 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5095 		/* disable cgcg and cgls in FSM */
5096 		if (def != data)
5097 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5098 	}
5099 
5100 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5101 }
5102 
5103 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5104 					    bool enable)
5105 {
5106 	if (enable) {
5107 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5108 		 * ===  MGCG + MGLS ===
5109 		 */
5110 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5111 		/* ===  CGCG /CGLS for GFX 3D Only === */
5112 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5113 		/* ===  CGCG + CGLS === */
5114 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5115 	} else {
5116 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5117 		 * ===  CGCG + CGLS ===
5118 		 */
5119 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5120 		/* ===  CGCG /CGLS for GFX 3D Only === */
5121 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5122 		/* ===  MGCG + MGLS === */
5123 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5124 	}
5125 	return 0;
5126 }
5127 
5128 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5129 {
5130 	u32 reg, data;
5131 
5132 	amdgpu_gfx_off_ctrl(adev, false);
5133 
5134 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5135 	if (amdgpu_sriov_is_pp_one_vf(adev))
5136 		data = RREG32_NO_KIQ(reg);
5137 	else
5138 		data = RREG32(reg);
5139 
5140 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5141 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5142 
5143 	if (amdgpu_sriov_is_pp_one_vf(adev))
5144 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5145 	else
5146 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5147 
5148 	amdgpu_gfx_off_ctrl(adev, true);
5149 }
5150 
5151 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5152 					uint32_t offset,
5153 					struct soc15_reg_rlcg *entries, int arr_size)
5154 {
5155 	int i;
5156 	uint32_t reg;
5157 
5158 	if (!entries)
5159 		return false;
5160 
5161 	for (i = 0; i < arr_size; i++) {
5162 		const struct soc15_reg_rlcg *entry;
5163 
5164 		entry = &entries[i];
5165 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5166 		if (offset == reg)
5167 			return true;
5168 	}
5169 
5170 	return false;
5171 }
5172 
5173 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5174 {
5175 	return gfx_v9_0_check_rlcg_range(adev, offset,
5176 					(void *)rlcg_access_gc_9_0,
5177 					ARRAY_SIZE(rlcg_access_gc_9_0));
5178 }
5179 
5180 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5181 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5182 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5183 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5184 	.init = gfx_v9_0_rlc_init,
5185 	.get_csb_size = gfx_v9_0_get_csb_size,
5186 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5187 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5188 	.resume = gfx_v9_0_rlc_resume,
5189 	.stop = gfx_v9_0_rlc_stop,
5190 	.reset = gfx_v9_0_rlc_reset,
5191 	.start = gfx_v9_0_rlc_start,
5192 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5193 	.sriov_wreg = gfx_v9_0_sriov_wreg,
5194 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5195 };
5196 
5197 static int gfx_v9_0_set_powergating_state(void *handle,
5198 					  enum amd_powergating_state state)
5199 {
5200 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5201 	bool enable = (state == AMD_PG_STATE_GATE);
5202 
5203 	switch (adev->ip_versions[GC_HWIP][0]) {
5204 	case IP_VERSION(9, 2, 2):
5205 	case IP_VERSION(9, 1, 0):
5206 	case IP_VERSION(9, 3, 0):
5207 		if (!enable)
5208 			amdgpu_gfx_off_ctrl(adev, false);
5209 
5210 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5211 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5212 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5213 		} else {
5214 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5215 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5216 		}
5217 
5218 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5219 			gfx_v9_0_enable_cp_power_gating(adev, true);
5220 		else
5221 			gfx_v9_0_enable_cp_power_gating(adev, false);
5222 
5223 		/* update gfx cgpg state */
5224 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5225 
5226 		/* update mgcg state */
5227 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5228 
5229 		if (enable)
5230 			amdgpu_gfx_off_ctrl(adev, true);
5231 		break;
5232 	case IP_VERSION(9, 2, 1):
5233 		amdgpu_gfx_off_ctrl(adev, enable);
5234 		break;
5235 	default:
5236 		break;
5237 	}
5238 
5239 	return 0;
5240 }
5241 
5242 static int gfx_v9_0_set_clockgating_state(void *handle,
5243 					  enum amd_clockgating_state state)
5244 {
5245 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5246 
5247 	if (amdgpu_sriov_vf(adev))
5248 		return 0;
5249 
5250 	switch (adev->ip_versions[GC_HWIP][0]) {
5251 	case IP_VERSION(9, 0, 1):
5252 	case IP_VERSION(9, 2, 1):
5253 	case IP_VERSION(9, 4, 0):
5254 	case IP_VERSION(9, 2, 2):
5255 	case IP_VERSION(9, 1, 0):
5256 	case IP_VERSION(9, 4, 1):
5257 	case IP_VERSION(9, 3, 0):
5258 	case IP_VERSION(9, 4, 2):
5259 		gfx_v9_0_update_gfx_clock_gating(adev,
5260 						 state == AMD_CG_STATE_GATE);
5261 		break;
5262 	default:
5263 		break;
5264 	}
5265 	return 0;
5266 }
5267 
5268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5269 {
5270 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5271 	int data;
5272 
5273 	if (amdgpu_sriov_vf(adev))
5274 		*flags = 0;
5275 
5276 	/* AMD_CG_SUPPORT_GFX_MGCG */
5277 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5278 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5279 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5280 
5281 	/* AMD_CG_SUPPORT_GFX_CGCG */
5282 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5283 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5284 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5285 
5286 	/* AMD_CG_SUPPORT_GFX_CGLS */
5287 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5288 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5289 
5290 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5291 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5292 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5293 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5294 
5295 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5296 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5297 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5298 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5299 
5300 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5301 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5302 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5303 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5304 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5305 
5306 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5307 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5308 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5309 	}
5310 }
5311 
5312 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5313 {
5314 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5315 }
5316 
5317 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5318 {
5319 	struct amdgpu_device *adev = ring->adev;
5320 	u64 wptr;
5321 
5322 	/* XXX check if swapping is necessary on BE */
5323 	if (ring->use_doorbell) {
5324 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5325 	} else {
5326 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5327 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5328 	}
5329 
5330 	return wptr;
5331 }
5332 
5333 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5334 {
5335 	struct amdgpu_device *adev = ring->adev;
5336 
5337 	if (ring->use_doorbell) {
5338 		/* XXX check if swapping is necessary on BE */
5339 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5340 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5341 	} else {
5342 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5343 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5344 	}
5345 }
5346 
5347 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5348 {
5349 	struct amdgpu_device *adev = ring->adev;
5350 	u32 ref_and_mask, reg_mem_engine;
5351 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5352 
5353 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5354 		switch (ring->me) {
5355 		case 1:
5356 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5357 			break;
5358 		case 2:
5359 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5360 			break;
5361 		default:
5362 			return;
5363 		}
5364 		reg_mem_engine = 0;
5365 	} else {
5366 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5367 		reg_mem_engine = 1; /* pfp */
5368 	}
5369 
5370 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5371 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5372 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5373 			      ref_and_mask, ref_and_mask, 0x20);
5374 }
5375 
5376 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5377 					struct amdgpu_job *job,
5378 					struct amdgpu_ib *ib,
5379 					uint32_t flags)
5380 {
5381 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5382 	u32 header, control = 0;
5383 
5384 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5385 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5386 	else
5387 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5388 
5389 	control |= ib->length_dw | (vmid << 24);
5390 
5391 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5392 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5393 
5394 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5395 			gfx_v9_0_ring_emit_de_meta(ring);
5396 	}
5397 
5398 	amdgpu_ring_write(ring, header);
5399 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5400 	amdgpu_ring_write(ring,
5401 #ifdef __BIG_ENDIAN
5402 		(2 << 0) |
5403 #endif
5404 		lower_32_bits(ib->gpu_addr));
5405 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5406 	amdgpu_ring_write(ring, control);
5407 }
5408 
5409 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5410 					  struct amdgpu_job *job,
5411 					  struct amdgpu_ib *ib,
5412 					  uint32_t flags)
5413 {
5414 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5415 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5416 
5417 	/* Currently, there is a high possibility to get wave ID mismatch
5418 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5419 	 * different wave IDs than the GDS expects. This situation happens
5420 	 * randomly when at least 5 compute pipes use GDS ordered append.
5421 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5422 	 * Those are probably bugs somewhere else in the kernel driver.
5423 	 *
5424 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5425 	 * GDS to 0 for this ring (me/pipe).
5426 	 */
5427 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5428 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5429 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5430 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5431 	}
5432 
5433 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5434 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5435 	amdgpu_ring_write(ring,
5436 #ifdef __BIG_ENDIAN
5437 				(2 << 0) |
5438 #endif
5439 				lower_32_bits(ib->gpu_addr));
5440 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5441 	amdgpu_ring_write(ring, control);
5442 }
5443 
5444 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5445 				     u64 seq, unsigned flags)
5446 {
5447 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5448 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5449 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5450 
5451 	/* RELEASE_MEM - flush caches, send int */
5452 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5453 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5454 					       EOP_TC_NC_ACTION_EN) :
5455 					      (EOP_TCL1_ACTION_EN |
5456 					       EOP_TC_ACTION_EN |
5457 					       EOP_TC_WB_ACTION_EN |
5458 					       EOP_TC_MD_ACTION_EN)) |
5459 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5460 				 EVENT_INDEX(5)));
5461 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5462 
5463 	/*
5464 	 * the address should be Qword aligned if 64bit write, Dword
5465 	 * aligned if only send 32bit data low (discard data high)
5466 	 */
5467 	if (write64bit)
5468 		BUG_ON(addr & 0x7);
5469 	else
5470 		BUG_ON(addr & 0x3);
5471 	amdgpu_ring_write(ring, lower_32_bits(addr));
5472 	amdgpu_ring_write(ring, upper_32_bits(addr));
5473 	amdgpu_ring_write(ring, lower_32_bits(seq));
5474 	amdgpu_ring_write(ring, upper_32_bits(seq));
5475 	amdgpu_ring_write(ring, 0);
5476 }
5477 
5478 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5479 {
5480 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5481 	uint32_t seq = ring->fence_drv.sync_seq;
5482 	uint64_t addr = ring->fence_drv.gpu_addr;
5483 
5484 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5485 			      lower_32_bits(addr), upper_32_bits(addr),
5486 			      seq, 0xffffffff, 4);
5487 }
5488 
5489 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5490 					unsigned vmid, uint64_t pd_addr)
5491 {
5492 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5493 
5494 	/* compute doesn't have PFP */
5495 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5496 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5497 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5498 		amdgpu_ring_write(ring, 0x0);
5499 	}
5500 }
5501 
5502 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5503 {
5504 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5505 }
5506 
5507 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5508 {
5509 	u64 wptr;
5510 
5511 	/* XXX check if swapping is necessary on BE */
5512 	if (ring->use_doorbell)
5513 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5514 	else
5515 		BUG();
5516 	return wptr;
5517 }
5518 
5519 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5520 {
5521 	struct amdgpu_device *adev = ring->adev;
5522 
5523 	/* XXX check if swapping is necessary on BE */
5524 	if (ring->use_doorbell) {
5525 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5526 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5527 	} else{
5528 		BUG(); /* only DOORBELL method supported on gfx9 now */
5529 	}
5530 }
5531 
5532 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5533 					 u64 seq, unsigned int flags)
5534 {
5535 	struct amdgpu_device *adev = ring->adev;
5536 
5537 	/* we only allocate 32bit for each seq wb address */
5538 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5539 
5540 	/* write fence seq to the "addr" */
5541 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5542 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5543 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5544 	amdgpu_ring_write(ring, lower_32_bits(addr));
5545 	amdgpu_ring_write(ring, upper_32_bits(addr));
5546 	amdgpu_ring_write(ring, lower_32_bits(seq));
5547 
5548 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5549 		/* set register to trigger INT */
5550 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5551 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5552 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5553 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5554 		amdgpu_ring_write(ring, 0);
5555 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5556 	}
5557 }
5558 
5559 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5560 {
5561 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5562 	amdgpu_ring_write(ring, 0);
5563 }
5564 
5565 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5566 {
5567 	struct v9_ce_ib_state ce_payload = {0};
5568 	uint64_t csa_addr;
5569 	int cnt;
5570 
5571 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5572 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5573 
5574 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5575 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5576 				 WRITE_DATA_DST_SEL(8) |
5577 				 WR_CONFIRM) |
5578 				 WRITE_DATA_CACHE_POLICY(0));
5579 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5580 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5581 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5582 }
5583 
5584 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5585 {
5586 	struct v9_de_ib_state de_payload = {0};
5587 	uint64_t csa_addr, gds_addr;
5588 	int cnt;
5589 
5590 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5591 	gds_addr = csa_addr + 4096;
5592 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5593 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5594 
5595 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5596 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5597 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5598 				 WRITE_DATA_DST_SEL(8) |
5599 				 WR_CONFIRM) |
5600 				 WRITE_DATA_CACHE_POLICY(0));
5601 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5602 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5603 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5604 }
5605 
5606 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5607 				   bool secure)
5608 {
5609 	uint32_t v = secure ? FRAME_TMZ : 0;
5610 
5611 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5612 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5613 }
5614 
5615 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5616 {
5617 	uint32_t dw2 = 0;
5618 
5619 	if (amdgpu_sriov_vf(ring->adev))
5620 		gfx_v9_0_ring_emit_ce_meta(ring);
5621 
5622 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5623 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5624 		/* set load_global_config & load_global_uconfig */
5625 		dw2 |= 0x8001;
5626 		/* set load_cs_sh_regs */
5627 		dw2 |= 0x01000000;
5628 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5629 		dw2 |= 0x10002;
5630 
5631 		/* set load_ce_ram if preamble presented */
5632 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5633 			dw2 |= 0x10000000;
5634 	} else {
5635 		/* still load_ce_ram if this is the first time preamble presented
5636 		 * although there is no context switch happens.
5637 		 */
5638 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5639 			dw2 |= 0x10000000;
5640 	}
5641 
5642 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5643 	amdgpu_ring_write(ring, dw2);
5644 	amdgpu_ring_write(ring, 0);
5645 }
5646 
5647 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5648 {
5649 	unsigned ret;
5650 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5651 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5652 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5653 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5654 	ret = ring->wptr & ring->buf_mask;
5655 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5656 	return ret;
5657 }
5658 
5659 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5660 {
5661 	unsigned cur;
5662 	BUG_ON(offset > ring->buf_mask);
5663 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5664 
5665 	cur = (ring->wptr & ring->buf_mask) - 1;
5666 	if (likely(cur > offset))
5667 		ring->ring[offset] = cur - offset;
5668 	else
5669 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5670 }
5671 
5672 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5673 				    uint32_t reg_val_offs)
5674 {
5675 	struct amdgpu_device *adev = ring->adev;
5676 
5677 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5678 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5679 				(5 << 8) |	/* dst: memory */
5680 				(1 << 20));	/* write confirm */
5681 	amdgpu_ring_write(ring, reg);
5682 	amdgpu_ring_write(ring, 0);
5683 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5684 				reg_val_offs * 4));
5685 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5686 				reg_val_offs * 4));
5687 }
5688 
5689 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5690 				    uint32_t val)
5691 {
5692 	uint32_t cmd = 0;
5693 
5694 	switch (ring->funcs->type) {
5695 	case AMDGPU_RING_TYPE_GFX:
5696 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5697 		break;
5698 	case AMDGPU_RING_TYPE_KIQ:
5699 		cmd = (1 << 16); /* no inc addr */
5700 		break;
5701 	default:
5702 		cmd = WR_CONFIRM;
5703 		break;
5704 	}
5705 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5706 	amdgpu_ring_write(ring, cmd);
5707 	amdgpu_ring_write(ring, reg);
5708 	amdgpu_ring_write(ring, 0);
5709 	amdgpu_ring_write(ring, val);
5710 }
5711 
5712 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5713 					uint32_t val, uint32_t mask)
5714 {
5715 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5716 }
5717 
5718 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5719 						  uint32_t reg0, uint32_t reg1,
5720 						  uint32_t ref, uint32_t mask)
5721 {
5722 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5723 	struct amdgpu_device *adev = ring->adev;
5724 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5725 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5726 
5727 	if (fw_version_ok)
5728 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5729 				      ref, mask, 0x20);
5730 	else
5731 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5732 							   ref, mask);
5733 }
5734 
5735 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5736 {
5737 	struct amdgpu_device *adev = ring->adev;
5738 	uint32_t value = 0;
5739 
5740 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5741 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5742 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5743 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5744 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5745 }
5746 
5747 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5748 						 enum amdgpu_interrupt_state state)
5749 {
5750 	switch (state) {
5751 	case AMDGPU_IRQ_STATE_DISABLE:
5752 	case AMDGPU_IRQ_STATE_ENABLE:
5753 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5754 			       TIME_STAMP_INT_ENABLE,
5755 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5756 		break;
5757 	default:
5758 		break;
5759 	}
5760 }
5761 
5762 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5763 						     int me, int pipe,
5764 						     enum amdgpu_interrupt_state state)
5765 {
5766 	u32 mec_int_cntl, mec_int_cntl_reg;
5767 
5768 	/*
5769 	 * amdgpu controls only the first MEC. That's why this function only
5770 	 * handles the setting of interrupts for this specific MEC. All other
5771 	 * pipes' interrupts are set by amdkfd.
5772 	 */
5773 
5774 	if (me == 1) {
5775 		switch (pipe) {
5776 		case 0:
5777 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5778 			break;
5779 		case 1:
5780 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5781 			break;
5782 		case 2:
5783 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5784 			break;
5785 		case 3:
5786 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5787 			break;
5788 		default:
5789 			DRM_DEBUG("invalid pipe %d\n", pipe);
5790 			return;
5791 		}
5792 	} else {
5793 		DRM_DEBUG("invalid me %d\n", me);
5794 		return;
5795 	}
5796 
5797 	switch (state) {
5798 	case AMDGPU_IRQ_STATE_DISABLE:
5799 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5800 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5801 					     TIME_STAMP_INT_ENABLE, 0);
5802 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5803 		break;
5804 	case AMDGPU_IRQ_STATE_ENABLE:
5805 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5806 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5807 					     TIME_STAMP_INT_ENABLE, 1);
5808 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5809 		break;
5810 	default:
5811 		break;
5812 	}
5813 }
5814 
5815 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5816 					     struct amdgpu_irq_src *source,
5817 					     unsigned type,
5818 					     enum amdgpu_interrupt_state state)
5819 {
5820 	switch (state) {
5821 	case AMDGPU_IRQ_STATE_DISABLE:
5822 	case AMDGPU_IRQ_STATE_ENABLE:
5823 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5824 			       PRIV_REG_INT_ENABLE,
5825 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5826 		break;
5827 	default:
5828 		break;
5829 	}
5830 
5831 	return 0;
5832 }
5833 
5834 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5835 					      struct amdgpu_irq_src *source,
5836 					      unsigned type,
5837 					      enum amdgpu_interrupt_state state)
5838 {
5839 	switch (state) {
5840 	case AMDGPU_IRQ_STATE_DISABLE:
5841 	case AMDGPU_IRQ_STATE_ENABLE:
5842 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5843 			       PRIV_INSTR_INT_ENABLE,
5844 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5845 		break;
5846 	default:
5847 		break;
5848 	}
5849 
5850 	return 0;
5851 }
5852 
5853 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5854 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5855 			CP_ECC_ERROR_INT_ENABLE, 1)
5856 
5857 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5858 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5859 			CP_ECC_ERROR_INT_ENABLE, 0)
5860 
5861 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5862 					      struct amdgpu_irq_src *source,
5863 					      unsigned type,
5864 					      enum amdgpu_interrupt_state state)
5865 {
5866 	switch (state) {
5867 	case AMDGPU_IRQ_STATE_DISABLE:
5868 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5869 				CP_ECC_ERROR_INT_ENABLE, 0);
5870 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5871 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5872 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5873 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5874 		break;
5875 
5876 	case AMDGPU_IRQ_STATE_ENABLE:
5877 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5878 				CP_ECC_ERROR_INT_ENABLE, 1);
5879 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5880 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5881 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5882 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5883 		break;
5884 	default:
5885 		break;
5886 	}
5887 
5888 	return 0;
5889 }
5890 
5891 
5892 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5893 					    struct amdgpu_irq_src *src,
5894 					    unsigned type,
5895 					    enum amdgpu_interrupt_state state)
5896 {
5897 	switch (type) {
5898 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5899 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5900 		break;
5901 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5902 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5903 		break;
5904 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5905 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5906 		break;
5907 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5908 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5909 		break;
5910 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5911 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5912 		break;
5913 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5914 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5915 		break;
5916 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5917 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5918 		break;
5919 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5920 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5921 		break;
5922 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5923 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5924 		break;
5925 	default:
5926 		break;
5927 	}
5928 	return 0;
5929 }
5930 
5931 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5932 			    struct amdgpu_irq_src *source,
5933 			    struct amdgpu_iv_entry *entry)
5934 {
5935 	int i;
5936 	u8 me_id, pipe_id, queue_id;
5937 	struct amdgpu_ring *ring;
5938 
5939 	DRM_DEBUG("IH: CP EOP\n");
5940 	me_id = (entry->ring_id & 0x0c) >> 2;
5941 	pipe_id = (entry->ring_id & 0x03) >> 0;
5942 	queue_id = (entry->ring_id & 0x70) >> 4;
5943 
5944 	switch (me_id) {
5945 	case 0:
5946 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5947 		break;
5948 	case 1:
5949 	case 2:
5950 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5951 			ring = &adev->gfx.compute_ring[i];
5952 			/* Per-queue interrupt is supported for MEC starting from VI.
5953 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5954 			  */
5955 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5956 				amdgpu_fence_process(ring);
5957 		}
5958 		break;
5959 	}
5960 	return 0;
5961 }
5962 
5963 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5964 			   struct amdgpu_iv_entry *entry)
5965 {
5966 	u8 me_id, pipe_id, queue_id;
5967 	struct amdgpu_ring *ring;
5968 	int i;
5969 
5970 	me_id = (entry->ring_id & 0x0c) >> 2;
5971 	pipe_id = (entry->ring_id & 0x03) >> 0;
5972 	queue_id = (entry->ring_id & 0x70) >> 4;
5973 
5974 	switch (me_id) {
5975 	case 0:
5976 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5977 		break;
5978 	case 1:
5979 	case 2:
5980 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5981 			ring = &adev->gfx.compute_ring[i];
5982 			if (ring->me == me_id && ring->pipe == pipe_id &&
5983 			    ring->queue == queue_id)
5984 				drm_sched_fault(&ring->sched);
5985 		}
5986 		break;
5987 	}
5988 }
5989 
5990 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5991 				 struct amdgpu_irq_src *source,
5992 				 struct amdgpu_iv_entry *entry)
5993 {
5994 	DRM_ERROR("Illegal register access in command stream\n");
5995 	gfx_v9_0_fault(adev, entry);
5996 	return 0;
5997 }
5998 
5999 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6000 				  struct amdgpu_irq_src *source,
6001 				  struct amdgpu_iv_entry *entry)
6002 {
6003 	DRM_ERROR("Illegal instruction in command stream\n");
6004 	gfx_v9_0_fault(adev, entry);
6005 	return 0;
6006 }
6007 
6008 
6009 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6010 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6011 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6012 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6013 	},
6014 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6015 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6016 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6017 	},
6018 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6019 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6020 	  0, 0
6021 	},
6022 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6023 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6024 	  0, 0
6025 	},
6026 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6027 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6028 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6029 	},
6030 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6031 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6032 	  0, 0
6033 	},
6034 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6035 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6036 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6037 	},
6038 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6039 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6040 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6041 	},
6042 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6043 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6044 	  0, 0
6045 	},
6046 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6047 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6048 	  0, 0
6049 	},
6050 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6051 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6052 	  0, 0
6053 	},
6054 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6055 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6056 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6057 	},
6058 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6059 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6060 	  0, 0
6061 	},
6062 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6063 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6064 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6065 	},
6066 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6067 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6068 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6069 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6070 	},
6071 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6072 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6073 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6074 	  0, 0
6075 	},
6076 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6077 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6078 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6079 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6080 	},
6081 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6082 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6083 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6084 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6085 	},
6086 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6087 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6088 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6089 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6090 	},
6091 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6092 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6093 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6094 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6095 	},
6096 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6097 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6098 	  0, 0
6099 	},
6100 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6101 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6102 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6103 	},
6104 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6105 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6106 	  0, 0
6107 	},
6108 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6109 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6110 	  0, 0
6111 	},
6112 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6113 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6114 	  0, 0
6115 	},
6116 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6117 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6118 	  0, 0
6119 	},
6120 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6121 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6122 	  0, 0
6123 	},
6124 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6125 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6126 	  0, 0
6127 	},
6128 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6129 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6130 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6131 	},
6132 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6133 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6134 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6135 	},
6136 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6137 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6138 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6139 	},
6140 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6141 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6142 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6143 	},
6144 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6145 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6146 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6147 	},
6148 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6149 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6150 	  0, 0
6151 	},
6152 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6153 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6154 	  0, 0
6155 	},
6156 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6157 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6158 	  0, 0
6159 	},
6160 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6161 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6162 	  0, 0
6163 	},
6164 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6165 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6166 	  0, 0
6167 	},
6168 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6169 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6170 	  0, 0
6171 	},
6172 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6173 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6174 	  0, 0
6175 	},
6176 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6177 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6178 	  0, 0
6179 	},
6180 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6181 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6182 	  0, 0
6183 	},
6184 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6185 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6186 	  0, 0
6187 	},
6188 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6189 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6190 	  0, 0
6191 	},
6192 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6193 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6194 	  0, 0
6195 	},
6196 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6197 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6198 	  0, 0
6199 	},
6200 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6201 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6202 	  0, 0
6203 	},
6204 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6205 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6206 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6207 	},
6208 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6209 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6210 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6211 	},
6212 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6213 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6214 	  0, 0
6215 	},
6216 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6217 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6218 	  0, 0
6219 	},
6220 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6221 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6222 	  0, 0
6223 	},
6224 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6225 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6226 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6227 	},
6228 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6229 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6230 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6231 	},
6232 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6233 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6234 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6235 	},
6236 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6237 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6238 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6239 	},
6240 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6241 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6242 	  0, 0
6243 	},
6244 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6245 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6246 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6247 	},
6248 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6249 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6250 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6251 	},
6252 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6253 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6254 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6255 	},
6256 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6257 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6258 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6259 	},
6260 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6261 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6262 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6263 	},
6264 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6265 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6266 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6267 	},
6268 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6269 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6270 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6271 	},
6272 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6274 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6275 	},
6276 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6278 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6279 	},
6280 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6282 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6283 	},
6284 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6286 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6287 	},
6288 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6289 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6290 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6291 	},
6292 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6293 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6294 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6295 	},
6296 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6297 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6298 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6299 	},
6300 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6301 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6302 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6303 	},
6304 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6305 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6306 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6307 	},
6308 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6309 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6310 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6311 	},
6312 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6313 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6314 	  0, 0
6315 	},
6316 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6317 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6318 	  0, 0
6319 	},
6320 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6321 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6322 	  0, 0
6323 	},
6324 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6325 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6326 	  0, 0
6327 	},
6328 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6329 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6330 	  0, 0
6331 	},
6332 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6333 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6334 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6335 	},
6336 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6337 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6338 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6339 	},
6340 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6341 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6342 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6343 	},
6344 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6345 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6346 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6347 	},
6348 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6349 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6350 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6351 	},
6352 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6353 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6354 	  0, 0
6355 	},
6356 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6357 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6358 	  0, 0
6359 	},
6360 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6361 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6362 	  0, 0
6363 	},
6364 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6365 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6366 	  0, 0
6367 	},
6368 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6369 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6370 	  0, 0
6371 	},
6372 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6373 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6374 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6375 	},
6376 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6377 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6378 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6379 	},
6380 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6381 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6382 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6383 	},
6384 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6385 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6386 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6387 	},
6388 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6389 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6390 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6391 	},
6392 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6393 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6394 	  0, 0
6395 	},
6396 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6397 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6398 	  0, 0
6399 	},
6400 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6401 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6402 	  0, 0
6403 	},
6404 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6405 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6406 	  0, 0
6407 	},
6408 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6409 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6410 	  0, 0
6411 	},
6412 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6413 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6414 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6415 	},
6416 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6417 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6418 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6419 	},
6420 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6421 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6422 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6423 	},
6424 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6425 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6426 	  0, 0
6427 	},
6428 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6429 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6430 	  0, 0
6431 	},
6432 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6433 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6434 	  0, 0
6435 	},
6436 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6437 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6438 	  0, 0
6439 	},
6440 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6441 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6442 	  0, 0
6443 	},
6444 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6445 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6446 	  0, 0
6447 	}
6448 };
6449 
6450 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6451 				     void *inject_if)
6452 {
6453 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6454 	int ret;
6455 	struct ta_ras_trigger_error_input block_info = { 0 };
6456 
6457 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6458 		return -EINVAL;
6459 
6460 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6461 		return -EINVAL;
6462 
6463 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6464 		return -EPERM;
6465 
6466 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6467 	      info->head.type)) {
6468 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6469 			ras_gfx_subblocks[info->head.sub_block_index].name,
6470 			info->head.type);
6471 		return -EPERM;
6472 	}
6473 
6474 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6475 	      info->head.type)) {
6476 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6477 			ras_gfx_subblocks[info->head.sub_block_index].name,
6478 			info->head.type);
6479 		return -EPERM;
6480 	}
6481 
6482 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6483 	block_info.sub_block_index =
6484 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6485 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6486 	block_info.address = info->address;
6487 	block_info.value = info->value;
6488 
6489 	mutex_lock(&adev->grbm_idx_mutex);
6490 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6491 	mutex_unlock(&adev->grbm_idx_mutex);
6492 
6493 	return ret;
6494 }
6495 
6496 static const char *vml2_mems[] = {
6497 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6498 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6499 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6500 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6501 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6502 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6503 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6504 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6505 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6506 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6507 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6508 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6509 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6510 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6511 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6512 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6513 };
6514 
6515 static const char *vml2_walker_mems[] = {
6516 	"UTC_VML2_CACHE_PDE0_MEM0",
6517 	"UTC_VML2_CACHE_PDE0_MEM1",
6518 	"UTC_VML2_CACHE_PDE1_MEM0",
6519 	"UTC_VML2_CACHE_PDE1_MEM1",
6520 	"UTC_VML2_CACHE_PDE2_MEM0",
6521 	"UTC_VML2_CACHE_PDE2_MEM1",
6522 	"UTC_VML2_RDIF_LOG_FIFO",
6523 };
6524 
6525 static const char *atc_l2_cache_2m_mems[] = {
6526 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6527 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6528 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6529 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6530 };
6531 
6532 static const char *atc_l2_cache_4k_mems[] = {
6533 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6534 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6535 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6536 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6537 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6538 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6539 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6540 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6541 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6542 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6543 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6544 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6545 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6546 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6547 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6548 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6549 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6550 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6551 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6552 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6553 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6554 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6555 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6556 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6557 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6558 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6559 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6560 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6561 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6562 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6563 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6564 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6565 };
6566 
6567 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6568 					 struct ras_err_data *err_data)
6569 {
6570 	uint32_t i, data;
6571 	uint32_t sec_count, ded_count;
6572 
6573 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6574 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6575 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6576 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6577 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6578 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6579 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6580 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6581 
6582 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6583 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6584 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6585 
6586 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6587 		if (sec_count) {
6588 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6589 				"SEC %d\n", i, vml2_mems[i], sec_count);
6590 			err_data->ce_count += sec_count;
6591 		}
6592 
6593 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6594 		if (ded_count) {
6595 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6596 				"DED %d\n", i, vml2_mems[i], ded_count);
6597 			err_data->ue_count += ded_count;
6598 		}
6599 	}
6600 
6601 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6602 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6603 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6604 
6605 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6606 						SEC_COUNT);
6607 		if (sec_count) {
6608 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6609 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6610 			err_data->ce_count += sec_count;
6611 		}
6612 
6613 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6614 						DED_COUNT);
6615 		if (ded_count) {
6616 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6618 			err_data->ue_count += ded_count;
6619 		}
6620 	}
6621 
6622 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6623 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6624 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6625 
6626 		sec_count = (data & 0x00006000L) >> 0xd;
6627 		if (sec_count) {
6628 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6629 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6630 				sec_count);
6631 			err_data->ce_count += sec_count;
6632 		}
6633 	}
6634 
6635 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6636 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6637 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6638 
6639 		sec_count = (data & 0x00006000L) >> 0xd;
6640 		if (sec_count) {
6641 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6642 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6643 				sec_count);
6644 			err_data->ce_count += sec_count;
6645 		}
6646 
6647 		ded_count = (data & 0x00018000L) >> 0xf;
6648 		if (ded_count) {
6649 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6650 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6651 				ded_count);
6652 			err_data->ue_count += ded_count;
6653 		}
6654 	}
6655 
6656 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6657 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6658 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6659 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6660 
6661 	return 0;
6662 }
6663 
6664 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6665 	const struct soc15_reg_entry *reg,
6666 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6667 	uint32_t *sec_count, uint32_t *ded_count)
6668 {
6669 	uint32_t i;
6670 	uint32_t sec_cnt, ded_cnt;
6671 
6672 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6673 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6674 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6675 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6676 			continue;
6677 
6678 		sec_cnt = (value &
6679 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6680 				gfx_v9_0_ras_fields[i].sec_count_shift;
6681 		if (sec_cnt) {
6682 			dev_info(adev->dev, "GFX SubBlock %s, "
6683 				"Instance[%d][%d], SEC %d\n",
6684 				gfx_v9_0_ras_fields[i].name,
6685 				se_id, inst_id,
6686 				sec_cnt);
6687 			*sec_count += sec_cnt;
6688 		}
6689 
6690 		ded_cnt = (value &
6691 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6692 				gfx_v9_0_ras_fields[i].ded_count_shift;
6693 		if (ded_cnt) {
6694 			dev_info(adev->dev, "GFX SubBlock %s, "
6695 				"Instance[%d][%d], DED %d\n",
6696 				gfx_v9_0_ras_fields[i].name,
6697 				se_id, inst_id,
6698 				ded_cnt);
6699 			*ded_count += ded_cnt;
6700 		}
6701 	}
6702 
6703 	return 0;
6704 }
6705 
6706 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6707 {
6708 	int i, j, k;
6709 
6710 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6711 		return;
6712 
6713 	/* read back registers to clear the counters */
6714 	mutex_lock(&adev->grbm_idx_mutex);
6715 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6716 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6717 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6718 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6719 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6720 			}
6721 		}
6722 	}
6723 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6724 	mutex_unlock(&adev->grbm_idx_mutex);
6725 
6726 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6727 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6728 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6729 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6730 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6731 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6732 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6733 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6734 
6735 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6736 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6737 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6738 	}
6739 
6740 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6741 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6742 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6743 	}
6744 
6745 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6746 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6747 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6748 	}
6749 
6750 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6751 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6752 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6753 	}
6754 
6755 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6756 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6757 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6758 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6759 }
6760 
6761 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6762 					  void *ras_error_status)
6763 {
6764 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6765 	uint32_t sec_count = 0, ded_count = 0;
6766 	uint32_t i, j, k;
6767 	uint32_t reg_value;
6768 
6769 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6770 		return -EINVAL;
6771 
6772 	err_data->ue_count = 0;
6773 	err_data->ce_count = 0;
6774 
6775 	mutex_lock(&adev->grbm_idx_mutex);
6776 
6777 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6778 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6779 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6780 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6781 				reg_value =
6782 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6783 				if (reg_value)
6784 					gfx_v9_0_ras_error_count(adev,
6785 						&gfx_v9_0_edc_counter_regs[i],
6786 						j, k, reg_value,
6787 						&sec_count, &ded_count);
6788 			}
6789 		}
6790 	}
6791 
6792 	err_data->ce_count += sec_count;
6793 	err_data->ue_count += ded_count;
6794 
6795 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6796 	mutex_unlock(&adev->grbm_idx_mutex);
6797 
6798 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6799 
6800 	return 0;
6801 }
6802 
6803 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6804 {
6805 	const unsigned int cp_coher_cntl =
6806 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6807 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6808 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6809 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6810 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6811 
6812 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6813 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6814 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6815 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6816 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6817 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6818 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6819 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6820 }
6821 
6822 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6823 					uint32_t pipe, bool enable)
6824 {
6825 	struct amdgpu_device *adev = ring->adev;
6826 	uint32_t val;
6827 	uint32_t wcl_cs_reg;
6828 
6829 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6830 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6831 
6832 	switch (pipe) {
6833 	case 0:
6834 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6835 		break;
6836 	case 1:
6837 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6838 		break;
6839 	case 2:
6840 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6841 		break;
6842 	case 3:
6843 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6844 		break;
6845 	default:
6846 		DRM_DEBUG("invalid pipe %d\n", pipe);
6847 		return;
6848 	}
6849 
6850 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6851 
6852 }
6853 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6854 {
6855 	struct amdgpu_device *adev = ring->adev;
6856 	uint32_t val;
6857 	int i;
6858 
6859 
6860 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6861 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6862 	 * around 25% of gpu resources.
6863 	 */
6864 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6865 	amdgpu_ring_emit_wreg(ring,
6866 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6867 			      val);
6868 
6869 	/* Restrict waves for normal/low priority compute queues as well
6870 	 * to get best QoS for high priority compute jobs.
6871 	 *
6872 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6873 	 */
6874 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6875 		if (i != ring->pipe)
6876 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6877 
6878 	}
6879 }
6880 
6881 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6882 	.name = "gfx_v9_0",
6883 	.early_init = gfx_v9_0_early_init,
6884 	.late_init = gfx_v9_0_late_init,
6885 	.sw_init = gfx_v9_0_sw_init,
6886 	.sw_fini = gfx_v9_0_sw_fini,
6887 	.hw_init = gfx_v9_0_hw_init,
6888 	.hw_fini = gfx_v9_0_hw_fini,
6889 	.suspend = gfx_v9_0_suspend,
6890 	.resume = gfx_v9_0_resume,
6891 	.is_idle = gfx_v9_0_is_idle,
6892 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6893 	.soft_reset = gfx_v9_0_soft_reset,
6894 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6895 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6896 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6897 };
6898 
6899 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6900 	.type = AMDGPU_RING_TYPE_GFX,
6901 	.align_mask = 0xff,
6902 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6903 	.support_64bit_ptrs = true,
6904 	.vmhub = AMDGPU_GFXHUB_0,
6905 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6906 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6907 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6908 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6909 		5 +  /* COND_EXEC */
6910 		7 +  /* PIPELINE_SYNC */
6911 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6912 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6913 		2 + /* VM_FLUSH */
6914 		8 +  /* FENCE for VM_FLUSH */
6915 		20 + /* GDS switch */
6916 		4 + /* double SWITCH_BUFFER,
6917 		       the first COND_EXEC jump to the place just
6918 			   prior to this double SWITCH_BUFFER  */
6919 		5 + /* COND_EXEC */
6920 		7 +	 /*	HDP_flush */
6921 		4 +	 /*	VGT_flush */
6922 		14 + /*	CE_META */
6923 		31 + /*	DE_META */
6924 		3 + /* CNTX_CTRL */
6925 		5 + /* HDP_INVL */
6926 		8 + 8 + /* FENCE x2 */
6927 		2 + /* SWITCH_BUFFER */
6928 		7, /* gfx_v9_0_emit_mem_sync */
6929 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6930 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6931 	.emit_fence = gfx_v9_0_ring_emit_fence,
6932 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6933 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6934 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6935 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6936 	.test_ring = gfx_v9_0_ring_test_ring,
6937 	.test_ib = gfx_v9_0_ring_test_ib,
6938 	.insert_nop = amdgpu_ring_insert_nop,
6939 	.pad_ib = amdgpu_ring_generic_pad_ib,
6940 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6941 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6942 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6943 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6944 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6945 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6946 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6947 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6948 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6949 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6950 };
6951 
6952 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6953 	.type = AMDGPU_RING_TYPE_COMPUTE,
6954 	.align_mask = 0xff,
6955 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6956 	.support_64bit_ptrs = true,
6957 	.vmhub = AMDGPU_GFXHUB_0,
6958 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6959 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6960 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6961 	.emit_frame_size =
6962 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6963 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6964 		5 + /* hdp invalidate */
6965 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6966 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6967 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6968 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6969 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6970 		7 + /* gfx_v9_0_emit_mem_sync */
6971 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6972 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6973 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6974 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6975 	.emit_fence = gfx_v9_0_ring_emit_fence,
6976 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6977 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6978 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6979 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6980 	.test_ring = gfx_v9_0_ring_test_ring,
6981 	.test_ib = gfx_v9_0_ring_test_ib,
6982 	.insert_nop = amdgpu_ring_insert_nop,
6983 	.pad_ib = amdgpu_ring_generic_pad_ib,
6984 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6985 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6986 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6987 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6988 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6989 };
6990 
6991 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6992 	.type = AMDGPU_RING_TYPE_KIQ,
6993 	.align_mask = 0xff,
6994 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6995 	.support_64bit_ptrs = true,
6996 	.vmhub = AMDGPU_GFXHUB_0,
6997 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6998 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6999 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7000 	.emit_frame_size =
7001 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7002 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7003 		5 + /* hdp invalidate */
7004 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7005 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7006 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7007 		2 + /* gfx_v9_0_ring_emit_vm_flush */
7008 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7009 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7010 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7011 	.test_ring = gfx_v9_0_ring_test_ring,
7012 	.insert_nop = amdgpu_ring_insert_nop,
7013 	.pad_ib = amdgpu_ring_generic_pad_ib,
7014 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7015 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7016 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7017 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7018 };
7019 
7020 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7021 {
7022 	int i;
7023 
7024 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7025 
7026 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7027 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7028 
7029 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7030 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7031 }
7032 
7033 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7034 	.set = gfx_v9_0_set_eop_interrupt_state,
7035 	.process = gfx_v9_0_eop_irq,
7036 };
7037 
7038 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7039 	.set = gfx_v9_0_set_priv_reg_fault_state,
7040 	.process = gfx_v9_0_priv_reg_irq,
7041 };
7042 
7043 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7044 	.set = gfx_v9_0_set_priv_inst_fault_state,
7045 	.process = gfx_v9_0_priv_inst_irq,
7046 };
7047 
7048 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7049 	.set = gfx_v9_0_set_cp_ecc_error_state,
7050 	.process = amdgpu_gfx_cp_ecc_error_irq,
7051 };
7052 
7053 
7054 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7055 {
7056 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7057 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7058 
7059 	adev->gfx.priv_reg_irq.num_types = 1;
7060 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7061 
7062 	adev->gfx.priv_inst_irq.num_types = 1;
7063 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7064 
7065 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7066 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7067 }
7068 
7069 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7070 {
7071 	switch (adev->ip_versions[GC_HWIP][0]) {
7072 	case IP_VERSION(9, 0, 1):
7073 	case IP_VERSION(9, 2, 1):
7074 	case IP_VERSION(9, 4, 0):
7075 	case IP_VERSION(9, 2, 2):
7076 	case IP_VERSION(9, 1, 0):
7077 	case IP_VERSION(9, 4, 1):
7078 	case IP_VERSION(9, 3, 0):
7079 	case IP_VERSION(9, 4, 2):
7080 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7081 		break;
7082 	default:
7083 		break;
7084 	}
7085 }
7086 
7087 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7088 {
7089 	/* init asci gds info */
7090 	switch (adev->ip_versions[GC_HWIP][0]) {
7091 	case IP_VERSION(9, 0, 1):
7092 	case IP_VERSION(9, 2, 1):
7093 	case IP_VERSION(9, 4, 0):
7094 		adev->gds.gds_size = 0x10000;
7095 		break;
7096 	case IP_VERSION(9, 2, 2):
7097 	case IP_VERSION(9, 1, 0):
7098 	case IP_VERSION(9, 4, 1):
7099 		adev->gds.gds_size = 0x1000;
7100 		break;
7101 	case IP_VERSION(9, 4, 2):
7102 		/* aldebaran removed all the GDS internal memory,
7103 		 * only support GWS opcode in kernel, like barrier
7104 		 * semaphore.etc */
7105 		adev->gds.gds_size = 0;
7106 		break;
7107 	default:
7108 		adev->gds.gds_size = 0x10000;
7109 		break;
7110 	}
7111 
7112 	switch (adev->ip_versions[GC_HWIP][0]) {
7113 	case IP_VERSION(9, 0, 1):
7114 	case IP_VERSION(9, 4, 0):
7115 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7116 		break;
7117 	case IP_VERSION(9, 2, 1):
7118 		adev->gds.gds_compute_max_wave_id = 0x27f;
7119 		break;
7120 	case IP_VERSION(9, 2, 2):
7121 	case IP_VERSION(9, 1, 0):
7122 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7123 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7124 		else
7125 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7126 		break;
7127 	case IP_VERSION(9, 4, 1):
7128 		adev->gds.gds_compute_max_wave_id = 0xfff;
7129 		break;
7130 	case IP_VERSION(9, 4, 2):
7131 		/* deprecated for Aldebaran, no usage at all */
7132 		adev->gds.gds_compute_max_wave_id = 0;
7133 		break;
7134 	default:
7135 		/* this really depends on the chip */
7136 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7137 		break;
7138 	}
7139 
7140 	adev->gds.gws_size = 64;
7141 	adev->gds.oa_size = 16;
7142 }
7143 
7144 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7145 						 u32 bitmap)
7146 {
7147 	u32 data;
7148 
7149 	if (!bitmap)
7150 		return;
7151 
7152 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7153 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7154 
7155 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7156 }
7157 
7158 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7159 {
7160 	u32 data, mask;
7161 
7162 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7163 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7164 
7165 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7166 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7167 
7168 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7169 
7170 	return (~data) & mask;
7171 }
7172 
7173 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7174 				 struct amdgpu_cu_info *cu_info)
7175 {
7176 	int i, j, k, counter, active_cu_number = 0;
7177 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7178 	unsigned disable_masks[4 * 4];
7179 
7180 	if (!adev || !cu_info)
7181 		return -EINVAL;
7182 
7183 	/*
7184 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7185 	 */
7186 	if (adev->gfx.config.max_shader_engines *
7187 		adev->gfx.config.max_sh_per_se > 16)
7188 		return -EINVAL;
7189 
7190 	amdgpu_gfx_parse_disable_cu(disable_masks,
7191 				    adev->gfx.config.max_shader_engines,
7192 				    adev->gfx.config.max_sh_per_se);
7193 
7194 	mutex_lock(&adev->grbm_idx_mutex);
7195 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7196 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7197 			mask = 1;
7198 			ao_bitmap = 0;
7199 			counter = 0;
7200 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7201 			gfx_v9_0_set_user_cu_inactive_bitmap(
7202 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7203 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7204 
7205 			/*
7206 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7207 			 * 4x4 size array, and it's usually suitable for Vega
7208 			 * ASICs which has 4*2 SE/SH layout.
7209 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7210 			 * To mostly reduce the impact, we make it compatible
7211 			 * with current bitmap array as below:
7212 			 *    SE4,SH0 --> bitmap[0][1]
7213 			 *    SE5,SH0 --> bitmap[1][1]
7214 			 *    SE6,SH0 --> bitmap[2][1]
7215 			 *    SE7,SH0 --> bitmap[3][1]
7216 			 */
7217 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7218 
7219 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7220 				if (bitmap & mask) {
7221 					if (counter < adev->gfx.config.max_cu_per_sh)
7222 						ao_bitmap |= mask;
7223 					counter ++;
7224 				}
7225 				mask <<= 1;
7226 			}
7227 			active_cu_number += counter;
7228 			if (i < 2 && j < 2)
7229 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7230 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7231 		}
7232 	}
7233 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7234 	mutex_unlock(&adev->grbm_idx_mutex);
7235 
7236 	cu_info->number = active_cu_number;
7237 	cu_info->ao_cu_mask = ao_cu_mask;
7238 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7239 
7240 	return 0;
7241 }
7242 
7243 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7244 {
7245 	.type = AMD_IP_BLOCK_TYPE_GFX,
7246 	.major = 9,
7247 	.minor = 0,
7248 	.rev = 0,
7249 	.funcs = &gfx_v9_0_ip_funcs,
7250 };
7251