xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 5c816641)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 #define GFX9_RLCG_GC_WRITE_OLD			(0x8 << 28)
67 #define GFX9_RLCG_GC_WRITE			(0x0 << 28)
68 #define GFX9_RLCG_GC_READ			(0x1 << 28)
69 #define GFX9_RLCG_VFGATE_DISABLED		0x4000000
70 #define GFX9_RLCG_WRONG_OPERATION_TYPE		0x2000000
71 #define GFX9_RLCG_NOT_IN_RANGE			0x1000000
72 
73 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
90 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
91 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
95 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/raven_me.bin");
97 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
98 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
104 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
105 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
108 
109 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
111 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
112 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
113 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
116 
117 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
118 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
123 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
124 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
129 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
130 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
132 
133 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
134 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
135 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
136 
137 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
138 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
139 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
140 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
141 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
142 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
143 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
144 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
145 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
146 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
147 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
148 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
149 
150 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
151 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
152 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
153 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
154 
155 enum ta_ras_gfx_subblock {
156 	/*CPC*/
157 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
158 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
159 	TA_RAS_BLOCK__GFX_CPC_UCODE,
160 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
161 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
162 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
163 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
164 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
165 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
166 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
167 	/* CPF*/
168 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
170 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
171 	TA_RAS_BLOCK__GFX_CPF_TAG,
172 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
173 	/* CPG*/
174 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
175 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
176 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
177 	TA_RAS_BLOCK__GFX_CPG_TAG,
178 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
179 	/* GDS*/
180 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
181 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
182 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
183 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
184 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
185 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
186 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
187 	/* SPI*/
188 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
189 	/* SQ*/
190 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
191 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
192 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
193 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
194 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
195 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
196 	/* SQC (3 ranges)*/
197 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
198 	/* SQC range 0*/
199 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
200 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
201 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
205 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
206 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
207 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
209 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
210 	/* SQC range 1*/
211 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
212 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
213 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
214 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
215 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
216 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
218 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
219 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
220 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
221 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
222 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
223 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
224 	/* SQC range 2*/
225 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
226 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
227 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
228 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
229 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
230 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
232 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
233 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
234 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
235 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
236 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
237 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
238 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
239 	/* TA*/
240 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
241 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
243 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
244 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
245 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
246 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
247 	/* TCA*/
248 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
250 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
251 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
252 	/* TCC (5 sub-ranges)*/
253 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
254 	/* TCC range 0*/
255 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
257 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
258 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
260 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
261 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
262 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
263 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
264 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
265 	/* TCC range 1*/
266 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
267 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
268 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
269 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
270 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
271 	/* TCC range 2*/
272 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
273 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
274 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
275 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
277 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
278 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
279 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
281 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
282 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
283 	/* TCC range 3*/
284 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
285 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
286 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
288 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
289 	/* TCC range 4*/
290 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
291 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
292 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
293 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
294 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
295 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
296 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
297 	/* TCI*/
298 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
299 	/* TCP*/
300 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
301 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
302 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
303 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
304 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
305 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
306 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
307 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
308 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
309 	/* TD*/
310 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
311 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
312 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
313 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
314 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
315 	/* EA (3 sub-ranges)*/
316 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
317 	/* EA range 0*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
319 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
320 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
322 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
323 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
324 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
325 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
327 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
328 	/* EA range 1*/
329 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
330 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
331 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
332 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
333 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
334 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
335 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
336 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
338 	/* EA range 2*/
339 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
340 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
341 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
342 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
343 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
344 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
345 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
346 	/* UTC VM L2 bank*/
347 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
348 	/* UTC VM walker*/
349 	TA_RAS_BLOCK__UTC_VML2_WALKER,
350 	/* UTC ATC L2 2MB cache*/
351 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
352 	/* UTC ATC L2 4KB cache*/
353 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
354 	TA_RAS_BLOCK__GFX_MAX
355 };
356 
357 struct ras_gfx_subblock {
358 	unsigned char *name;
359 	int ta_subblock;
360 	int hw_supported_error_type;
361 	int sw_supported_error_type;
362 };
363 
364 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
365 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
366 		#subblock,                                                     \
367 		TA_RAS_BLOCK__##subblock,                                      \
368 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
369 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
370 	}
371 
372 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
392 			     0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
398 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
400 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
401 			     0, 0),
402 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
403 			     0),
404 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
405 			     0, 0),
406 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
407 			     0),
408 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
409 			     0, 0),
410 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
411 			     0),
412 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
413 			     1),
414 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
415 			     0, 0, 0),
416 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
417 			     0),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
419 			     0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
421 			     0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 			     0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
425 			     0),
426 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
427 			     0, 0),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
429 			     0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
431 			     0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
433 			     0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
435 			     0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
437 			     0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
439 			     0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
441 			     0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
443 			     0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
445 			     0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
447 			     0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
457 			     1),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
459 			     1),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
461 			     1),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
463 			     0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
465 			     0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
481 			     0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
483 			     0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
485 			     0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
517 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
518 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
519 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
520 };
521 
522 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
523 {
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
544 };
545 
546 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
547 {
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
566 };
567 
568 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
569 {
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
581 };
582 
583 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
584 {
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
609 };
610 
611 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
612 {
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
620 };
621 
622 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
623 {
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
643 };
644 
645 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
646 {
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
666 };
667 
668 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
669 {
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
686 };
687 
688 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
689 {
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
703 };
704 
705 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
706 {
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
718 };
719 
720 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
721 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
722 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
723 };
724 
725 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
726 {
727 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
733 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
734 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
735 };
736 
737 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
738 {
739 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
745 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
746 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
747 };
748 
749 static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
750 {
751 	static void *scratch_reg0;
752 	static void *scratch_reg1;
753 	static void *scratch_reg2;
754 	static void *scratch_reg3;
755 	static void *spare_int;
756 	static uint32_t grbm_cntl;
757 	static uint32_t grbm_idx;
758 	uint32_t i = 0;
759 	uint32_t retries = 50000;
760 	u32 ret = 0;
761 	u32 tmp;
762 
763 	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
764 	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
765 	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
766 	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
767 	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
768 
769 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
770 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
771 
772 	if (offset == grbm_cntl || offset == grbm_idx) {
773 		if (offset  == grbm_cntl)
774 			writel(v, scratch_reg2);
775 		else if (offset == grbm_idx)
776 			writel(v, scratch_reg3);
777 
778 		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
779 	} else {
780 		/*
781 		 * SCRATCH_REG0 	= read/write value
782 		 * SCRATCH_REG1[30:28]	= command
783 		 * SCRATCH_REG1[19:0]	= address in dword
784 		 * SCRATCH_REG1[26:24]	= Error reporting
785 		 */
786 		writel(v, scratch_reg0);
787 		writel(offset | flag, scratch_reg1);
788 		writel(1, spare_int);
789 
790 		for (i = 0; i < retries; i++) {
791 			tmp = readl(scratch_reg1);
792 			if (!(tmp & flag))
793 				break;
794 
795 			udelay(10);
796 		}
797 
798 		if (i >= retries) {
799 			if (amdgpu_sriov_reg_indirect_gc(adev)) {
800 				if (tmp & GFX9_RLCG_VFGATE_DISABLED)
801 					pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset);
802 				else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)
803 					pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset);
804 				else if (tmp & GFX9_RLCG_NOT_IN_RANGE)
805 					pr_err("The register is not in range, program reg:0x%05x failed!\n", offset);
806 				else
807 					pr_err("Unknown error type, program reg:0x%05x failed!\n", offset);
808 			} else
809 				pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset);
810 		}
811 	}
812 
813 	ret = readl(scratch_reg0);
814 
815 	return ret;
816 }
817 
818 static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip,
819 				int write, u32 *rlcg_flag)
820 {
821 
822 	switch (hwip) {
823 	case GC_HWIP:
824 		if (amdgpu_sriov_reg_indirect_gc(adev)) {
825 			*rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ;
826 
827 			return true;
828 		/* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */
829 		} else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
830 			*rlcg_flag = GFX9_RLCG_GC_WRITE_OLD;
831 			return true;
832 		}
833 
834 		break;
835 	default:
836 		return false;
837 	}
838 
839 	return false;
840 }
841 
842 static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip)
843 {
844 	u32 rlcg_flag;
845 
846 	if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag))
847 		return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag);
848 
849 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
850 		return RREG32_NO_KIQ(offset);
851 	else
852 		return RREG32(offset);
853 }
854 
855 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
856 			       u32 value, u32 acc_flags, u32 hwip)
857 {
858 	u32 rlcg_flag;
859 
860 	if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) {
861 		gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag);
862 		return;
863 	}
864 
865 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
866 		WREG32_NO_KIQ(offset, value);
867 	else
868 		WREG32(offset, value);
869 }
870 
871 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
872 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
873 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
874 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
875 
876 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
877 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
878 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
879 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
880 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
881 				struct amdgpu_cu_info *cu_info);
882 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
883 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
884 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
885 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
886 					  void *ras_error_status);
887 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
888 				     void *inject_if);
889 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
890 
891 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
892 				uint64_t queue_mask)
893 {
894 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
895 	amdgpu_ring_write(kiq_ring,
896 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
897 		/* vmid_mask:0* queue_type:0 (KIQ) */
898 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
899 	amdgpu_ring_write(kiq_ring,
900 			lower_32_bits(queue_mask));	/* queue mask lo */
901 	amdgpu_ring_write(kiq_ring,
902 			upper_32_bits(queue_mask));	/* queue mask hi */
903 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
904 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
905 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
906 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
907 }
908 
909 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
910 				 struct amdgpu_ring *ring)
911 {
912 	struct amdgpu_device *adev = kiq_ring->adev;
913 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
914 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
915 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
916 
917 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
918 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
919 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
921 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
922 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
923 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
924 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
925 			 /*queue_type: normal compute queue */
926 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
927 			 /* alloc format: all_on_one_pipe */
928 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
929 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
930 			 /* num_queues: must be 1 */
931 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
932 	amdgpu_ring_write(kiq_ring,
933 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
934 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
935 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
936 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
937 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
938 }
939 
940 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
941 				   struct amdgpu_ring *ring,
942 				   enum amdgpu_unmap_queues_action action,
943 				   u64 gpu_addr, u64 seq)
944 {
945 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
946 
947 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
948 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
949 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
950 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
951 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
952 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
953 	amdgpu_ring_write(kiq_ring,
954 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
955 
956 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
957 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
958 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
959 		amdgpu_ring_write(kiq_ring, seq);
960 	} else {
961 		amdgpu_ring_write(kiq_ring, 0);
962 		amdgpu_ring_write(kiq_ring, 0);
963 		amdgpu_ring_write(kiq_ring, 0);
964 	}
965 }
966 
967 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
968 				   struct amdgpu_ring *ring,
969 				   u64 addr,
970 				   u64 seq)
971 {
972 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
973 
974 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
975 	amdgpu_ring_write(kiq_ring,
976 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
977 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
978 			  PACKET3_QUERY_STATUS_COMMAND(2));
979 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
980 	amdgpu_ring_write(kiq_ring,
981 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
982 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
983 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
984 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
985 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
986 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
987 }
988 
989 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
990 				uint16_t pasid, uint32_t flush_type,
991 				bool all_hub)
992 {
993 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
994 	amdgpu_ring_write(kiq_ring,
995 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
996 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
997 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
998 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
999 }
1000 
1001 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1002 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1003 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1004 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1005 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1006 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1007 	.set_resources_size = 8,
1008 	.map_queues_size = 7,
1009 	.unmap_queues_size = 6,
1010 	.query_status_size = 7,
1011 	.invalidate_tlbs_size = 2,
1012 };
1013 
1014 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1015 {
1016 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
1017 }
1018 
1019 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1020 {
1021 	switch (adev->ip_versions[GC_HWIP][0]) {
1022 	case IP_VERSION(9, 0, 1):
1023 		soc15_program_register_sequence(adev,
1024 						golden_settings_gc_9_0,
1025 						ARRAY_SIZE(golden_settings_gc_9_0));
1026 		soc15_program_register_sequence(adev,
1027 						golden_settings_gc_9_0_vg10,
1028 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1029 		break;
1030 	case IP_VERSION(9, 2, 1):
1031 		soc15_program_register_sequence(adev,
1032 						golden_settings_gc_9_2_1,
1033 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1034 		soc15_program_register_sequence(adev,
1035 						golden_settings_gc_9_2_1_vg12,
1036 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1037 		break;
1038 	case IP_VERSION(9, 4, 0):
1039 		soc15_program_register_sequence(adev,
1040 						golden_settings_gc_9_0,
1041 						ARRAY_SIZE(golden_settings_gc_9_0));
1042 		soc15_program_register_sequence(adev,
1043 						golden_settings_gc_9_0_vg20,
1044 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1045 		break;
1046 	case IP_VERSION(9, 4, 1):
1047 		soc15_program_register_sequence(adev,
1048 						golden_settings_gc_9_4_1_arct,
1049 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1050 		break;
1051 	case IP_VERSION(9, 2, 2):
1052 	case IP_VERSION(9, 1, 0):
1053 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1054 						ARRAY_SIZE(golden_settings_gc_9_1));
1055 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1056 			soc15_program_register_sequence(adev,
1057 							golden_settings_gc_9_1_rv2,
1058 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1059 		else
1060 			soc15_program_register_sequence(adev,
1061 							golden_settings_gc_9_1_rv1,
1062 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1063 		break;
1064 	 case IP_VERSION(9, 3, 0):
1065 		soc15_program_register_sequence(adev,
1066 						golden_settings_gc_9_1_rn,
1067 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1068 		return; /* for renoir, don't need common goldensetting */
1069 	case IP_VERSION(9, 4, 2):
1070 		gfx_v9_4_2_init_golden_registers(adev,
1071 						 adev->smuio.funcs->get_die_id(adev));
1072 		break;
1073 	default:
1074 		break;
1075 	}
1076 
1077 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1078 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1079 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1080 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1081 }
1082 
1083 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1084 {
1085 	adev->gfx.scratch.num_reg = 8;
1086 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1087 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1088 }
1089 
1090 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1091 				       bool wc, uint32_t reg, uint32_t val)
1092 {
1093 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1094 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1095 				WRITE_DATA_DST_SEL(0) |
1096 				(wc ? WR_CONFIRM : 0));
1097 	amdgpu_ring_write(ring, reg);
1098 	amdgpu_ring_write(ring, 0);
1099 	amdgpu_ring_write(ring, val);
1100 }
1101 
1102 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1103 				  int mem_space, int opt, uint32_t addr0,
1104 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1105 				  uint32_t inv)
1106 {
1107 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1108 	amdgpu_ring_write(ring,
1109 				 /* memory (1) or register (0) */
1110 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1111 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1112 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1113 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1114 
1115 	if (mem_space)
1116 		BUG_ON(addr0 & 0x3); /* Dword align */
1117 	amdgpu_ring_write(ring, addr0);
1118 	amdgpu_ring_write(ring, addr1);
1119 	amdgpu_ring_write(ring, ref);
1120 	amdgpu_ring_write(ring, mask);
1121 	amdgpu_ring_write(ring, inv); /* poll interval */
1122 }
1123 
1124 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1125 {
1126 	struct amdgpu_device *adev = ring->adev;
1127 	uint32_t scratch;
1128 	uint32_t tmp = 0;
1129 	unsigned i;
1130 	int r;
1131 
1132 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1133 	if (r)
1134 		return r;
1135 
1136 	WREG32(scratch, 0xCAFEDEAD);
1137 	r = amdgpu_ring_alloc(ring, 3);
1138 	if (r)
1139 		goto error_free_scratch;
1140 
1141 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1142 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1143 	amdgpu_ring_write(ring, 0xDEADBEEF);
1144 	amdgpu_ring_commit(ring);
1145 
1146 	for (i = 0; i < adev->usec_timeout; i++) {
1147 		tmp = RREG32(scratch);
1148 		if (tmp == 0xDEADBEEF)
1149 			break;
1150 		udelay(1);
1151 	}
1152 
1153 	if (i >= adev->usec_timeout)
1154 		r = -ETIMEDOUT;
1155 
1156 error_free_scratch:
1157 	amdgpu_gfx_scratch_free(adev, scratch);
1158 	return r;
1159 }
1160 
1161 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1162 {
1163 	struct amdgpu_device *adev = ring->adev;
1164 	struct amdgpu_ib ib;
1165 	struct dma_fence *f = NULL;
1166 
1167 	unsigned index;
1168 	uint64_t gpu_addr;
1169 	uint32_t tmp;
1170 	long r;
1171 
1172 	r = amdgpu_device_wb_get(adev, &index);
1173 	if (r)
1174 		return r;
1175 
1176 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1177 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1178 	memset(&ib, 0, sizeof(ib));
1179 	r = amdgpu_ib_get(adev, NULL, 16,
1180 					AMDGPU_IB_POOL_DIRECT, &ib);
1181 	if (r)
1182 		goto err1;
1183 
1184 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1185 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1186 	ib.ptr[2] = lower_32_bits(gpu_addr);
1187 	ib.ptr[3] = upper_32_bits(gpu_addr);
1188 	ib.ptr[4] = 0xDEADBEEF;
1189 	ib.length_dw = 5;
1190 
1191 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1192 	if (r)
1193 		goto err2;
1194 
1195 	r = dma_fence_wait_timeout(f, false, timeout);
1196 	if (r == 0) {
1197 		r = -ETIMEDOUT;
1198 		goto err2;
1199 	} else if (r < 0) {
1200 		goto err2;
1201 	}
1202 
1203 	tmp = adev->wb.wb[index];
1204 	if (tmp == 0xDEADBEEF)
1205 		r = 0;
1206 	else
1207 		r = -EINVAL;
1208 
1209 err2:
1210 	amdgpu_ib_free(adev, &ib, NULL);
1211 	dma_fence_put(f);
1212 err1:
1213 	amdgpu_device_wb_free(adev, index);
1214 	return r;
1215 }
1216 
1217 
1218 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1219 {
1220 	release_firmware(adev->gfx.pfp_fw);
1221 	adev->gfx.pfp_fw = NULL;
1222 	release_firmware(adev->gfx.me_fw);
1223 	adev->gfx.me_fw = NULL;
1224 	release_firmware(adev->gfx.ce_fw);
1225 	adev->gfx.ce_fw = NULL;
1226 	release_firmware(adev->gfx.rlc_fw);
1227 	adev->gfx.rlc_fw = NULL;
1228 	release_firmware(adev->gfx.mec_fw);
1229 	adev->gfx.mec_fw = NULL;
1230 	release_firmware(adev->gfx.mec2_fw);
1231 	adev->gfx.mec2_fw = NULL;
1232 
1233 	kfree(adev->gfx.rlc.register_list_format);
1234 }
1235 
1236 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1237 {
1238 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1239 
1240 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1241 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1242 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1243 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1244 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1245 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1246 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1247 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1248 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1249 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1250 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1251 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1252 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1253 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1254 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1255 }
1256 
1257 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1258 {
1259 	adev->gfx.me_fw_write_wait = false;
1260 	adev->gfx.mec_fw_write_wait = false;
1261 
1262 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1263 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1264 	    (adev->gfx.mec_feature_version < 46) ||
1265 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1266 	    (adev->gfx.pfp_feature_version < 46)))
1267 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1268 
1269 	switch (adev->ip_versions[GC_HWIP][0]) {
1270 	case IP_VERSION(9, 0, 1):
1271 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1272 		    (adev->gfx.me_feature_version >= 42) &&
1273 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1274 		    (adev->gfx.pfp_feature_version >= 42))
1275 			adev->gfx.me_fw_write_wait = true;
1276 
1277 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1278 		    (adev->gfx.mec_feature_version >= 42))
1279 			adev->gfx.mec_fw_write_wait = true;
1280 		break;
1281 	case IP_VERSION(9, 2, 1):
1282 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1283 		    (adev->gfx.me_feature_version >= 44) &&
1284 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1285 		    (adev->gfx.pfp_feature_version >= 44))
1286 			adev->gfx.me_fw_write_wait = true;
1287 
1288 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1289 		    (adev->gfx.mec_feature_version >= 44))
1290 			adev->gfx.mec_fw_write_wait = true;
1291 		break;
1292 	case IP_VERSION(9, 4, 0):
1293 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1294 		    (adev->gfx.me_feature_version >= 44) &&
1295 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1296 		    (adev->gfx.pfp_feature_version >= 44))
1297 			adev->gfx.me_fw_write_wait = true;
1298 
1299 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1300 		    (adev->gfx.mec_feature_version >= 44))
1301 			adev->gfx.mec_fw_write_wait = true;
1302 		break;
1303 	case IP_VERSION(9, 1, 0):
1304 	case IP_VERSION(9, 2, 2):
1305 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1306 		    (adev->gfx.me_feature_version >= 42) &&
1307 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1308 		    (adev->gfx.pfp_feature_version >= 42))
1309 			adev->gfx.me_fw_write_wait = true;
1310 
1311 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1312 		    (adev->gfx.mec_feature_version >= 42))
1313 			adev->gfx.mec_fw_write_wait = true;
1314 		break;
1315 	default:
1316 		adev->gfx.me_fw_write_wait = true;
1317 		adev->gfx.mec_fw_write_wait = true;
1318 		break;
1319 	}
1320 }
1321 
1322 struct amdgpu_gfxoff_quirk {
1323 	u16 chip_vendor;
1324 	u16 chip_device;
1325 	u16 subsys_vendor;
1326 	u16 subsys_device;
1327 	u8 revision;
1328 };
1329 
1330 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1331 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1332 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1333 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1334 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1335 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1336 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1337 	{ 0, 0, 0, 0, 0 },
1338 };
1339 
1340 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1341 {
1342 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1343 
1344 	while (p && p->chip_device != 0) {
1345 		if (pdev->vendor == p->chip_vendor &&
1346 		    pdev->device == p->chip_device &&
1347 		    pdev->subsystem_vendor == p->subsys_vendor &&
1348 		    pdev->subsystem_device == p->subsys_device &&
1349 		    pdev->revision == p->revision) {
1350 			return true;
1351 		}
1352 		++p;
1353 	}
1354 	return false;
1355 }
1356 
1357 static bool is_raven_kicker(struct amdgpu_device *adev)
1358 {
1359 	if (adev->pm.fw_version >= 0x41e2b)
1360 		return true;
1361 	else
1362 		return false;
1363 }
1364 
1365 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1366 {
1367 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1368 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1369 	    (adev->gfx.me_feature_version >= 52))
1370 		return true;
1371 	else
1372 		return false;
1373 }
1374 
1375 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1376 {
1377 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1378 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1379 
1380 	switch (adev->ip_versions[GC_HWIP][0]) {
1381 	case IP_VERSION(9, 0, 1):
1382 	case IP_VERSION(9, 2, 1):
1383 	case IP_VERSION(9, 4, 0):
1384 		break;
1385 	case IP_VERSION(9, 2, 2):
1386 	case IP_VERSION(9, 1, 0):
1387 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1388 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1389 		    ((!is_raven_kicker(adev) &&
1390 		      adev->gfx.rlc_fw_version < 531) ||
1391 		     (adev->gfx.rlc_feature_version < 1) ||
1392 		     !adev->gfx.rlc.is_rlc_v2_1))
1393 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394 
1395 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1396 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1397 				AMD_PG_SUPPORT_CP |
1398 				AMD_PG_SUPPORT_RLC_SMU_HS;
1399 		break;
1400 	case IP_VERSION(9, 3, 0):
1401 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1402 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1403 				AMD_PG_SUPPORT_CP |
1404 				AMD_PG_SUPPORT_RLC_SMU_HS;
1405 		break;
1406 	default:
1407 		break;
1408 	}
1409 }
1410 
1411 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1412 					  const char *chip_name)
1413 {
1414 	char fw_name[30];
1415 	int err;
1416 	struct amdgpu_firmware_info *info = NULL;
1417 	const struct common_firmware_header *header = NULL;
1418 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1419 
1420 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1421 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1422 	if (err)
1423 		goto out;
1424 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1425 	if (err)
1426 		goto out;
1427 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1428 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1429 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1430 
1431 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1432 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1433 	if (err)
1434 		goto out;
1435 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1436 	if (err)
1437 		goto out;
1438 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1439 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1440 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1441 
1442 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1443 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1444 	if (err)
1445 		goto out;
1446 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1447 	if (err)
1448 		goto out;
1449 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1450 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1451 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1452 
1453 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1454 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1455 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1456 		info->fw = adev->gfx.pfp_fw;
1457 		header = (const struct common_firmware_header *)info->fw->data;
1458 		adev->firmware.fw_size +=
1459 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1460 
1461 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1462 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1463 		info->fw = adev->gfx.me_fw;
1464 		header = (const struct common_firmware_header *)info->fw->data;
1465 		adev->firmware.fw_size +=
1466 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1467 
1468 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1469 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1470 		info->fw = adev->gfx.ce_fw;
1471 		header = (const struct common_firmware_header *)info->fw->data;
1472 		adev->firmware.fw_size +=
1473 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1474 	}
1475 
1476 out:
1477 	if (err) {
1478 		dev_err(adev->dev,
1479 			"gfx9: Failed to load firmware \"%s\"\n",
1480 			fw_name);
1481 		release_firmware(adev->gfx.pfp_fw);
1482 		adev->gfx.pfp_fw = NULL;
1483 		release_firmware(adev->gfx.me_fw);
1484 		adev->gfx.me_fw = NULL;
1485 		release_firmware(adev->gfx.ce_fw);
1486 		adev->gfx.ce_fw = NULL;
1487 	}
1488 	return err;
1489 }
1490 
1491 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1492 					  const char *chip_name)
1493 {
1494 	char fw_name[30];
1495 	int err;
1496 	struct amdgpu_firmware_info *info = NULL;
1497 	const struct common_firmware_header *header = NULL;
1498 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1499 	unsigned int *tmp = NULL;
1500 	unsigned int i = 0;
1501 	uint16_t version_major;
1502 	uint16_t version_minor;
1503 	uint32_t smu_version;
1504 
1505 	/*
1506 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1507 	 * instead of picasso_rlc.bin.
1508 	 * Judgment method:
1509 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1510 	 *          or revision >= 0xD8 && revision <= 0xDF
1511 	 * otherwise is PCO FP5
1512 	 */
1513 	if (!strcmp(chip_name, "picasso") &&
1514 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1515 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1516 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1517 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1518 		(smu_version >= 0x41e2b))
1519 		/**
1520 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1521 		*/
1522 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1523 	else
1524 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1525 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1526 	if (err)
1527 		goto out;
1528 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1529 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1530 
1531 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1532 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1533 	if (version_major == 2 && version_minor == 1)
1534 		adev->gfx.rlc.is_rlc_v2_1 = true;
1535 
1536 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1537 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1538 	adev->gfx.rlc.save_and_restore_offset =
1539 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1540 	adev->gfx.rlc.clear_state_descriptor_offset =
1541 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1542 	adev->gfx.rlc.avail_scratch_ram_locations =
1543 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1544 	adev->gfx.rlc.reg_restore_list_size =
1545 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1546 	adev->gfx.rlc.reg_list_format_start =
1547 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1548 	adev->gfx.rlc.reg_list_format_separate_start =
1549 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1550 	adev->gfx.rlc.starting_offsets_start =
1551 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1552 	adev->gfx.rlc.reg_list_format_size_bytes =
1553 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1554 	adev->gfx.rlc.reg_list_size_bytes =
1555 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1556 	adev->gfx.rlc.register_list_format =
1557 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1558 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1559 	if (!adev->gfx.rlc.register_list_format) {
1560 		err = -ENOMEM;
1561 		goto out;
1562 	}
1563 
1564 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1565 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1566 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1567 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1568 
1569 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1570 
1571 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1572 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1573 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1574 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1575 
1576 	if (adev->gfx.rlc.is_rlc_v2_1)
1577 		gfx_v9_0_init_rlc_ext_microcode(adev);
1578 
1579 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1580 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1581 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1582 		info->fw = adev->gfx.rlc_fw;
1583 		header = (const struct common_firmware_header *)info->fw->data;
1584 		adev->firmware.fw_size +=
1585 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1586 
1587 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1588 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1589 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1590 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1591 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1592 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1593 			info->fw = adev->gfx.rlc_fw;
1594 			adev->firmware.fw_size +=
1595 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1596 
1597 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1598 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1599 			info->fw = adev->gfx.rlc_fw;
1600 			adev->firmware.fw_size +=
1601 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1602 
1603 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1604 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1605 			info->fw = adev->gfx.rlc_fw;
1606 			adev->firmware.fw_size +=
1607 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1608 		}
1609 	}
1610 
1611 out:
1612 	if (err) {
1613 		dev_err(adev->dev,
1614 			"gfx9: Failed to load firmware \"%s\"\n",
1615 			fw_name);
1616 		release_firmware(adev->gfx.rlc_fw);
1617 		adev->gfx.rlc_fw = NULL;
1618 	}
1619 	return err;
1620 }
1621 
1622 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1623 {
1624 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1625 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1626 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1627 		return false;
1628 
1629 	return true;
1630 }
1631 
1632 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1633 					  const char *chip_name)
1634 {
1635 	char fw_name[30];
1636 	int err;
1637 	struct amdgpu_firmware_info *info = NULL;
1638 	const struct common_firmware_header *header = NULL;
1639 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1640 
1641 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1642 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1643 	if (err)
1644 		goto out;
1645 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1646 	if (err)
1647 		goto out;
1648 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1649 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1650 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1651 
1652 
1653 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1654 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1655 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1656 		if (!err) {
1657 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1658 			if (err)
1659 				goto out;
1660 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1661 			adev->gfx.mec2_fw->data;
1662 			adev->gfx.mec2_fw_version =
1663 			le32_to_cpu(cp_hdr->header.ucode_version);
1664 			adev->gfx.mec2_feature_version =
1665 			le32_to_cpu(cp_hdr->ucode_feature_version);
1666 		} else {
1667 			err = 0;
1668 			adev->gfx.mec2_fw = NULL;
1669 		}
1670 	} else {
1671 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1672 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1673 	}
1674 
1675 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1676 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1677 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1678 		info->fw = adev->gfx.mec_fw;
1679 		header = (const struct common_firmware_header *)info->fw->data;
1680 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1681 		adev->firmware.fw_size +=
1682 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1683 
1684 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1685 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1686 		info->fw = adev->gfx.mec_fw;
1687 		adev->firmware.fw_size +=
1688 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1689 
1690 		if (adev->gfx.mec2_fw) {
1691 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1692 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1693 			info->fw = adev->gfx.mec2_fw;
1694 			header = (const struct common_firmware_header *)info->fw->data;
1695 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1696 			adev->firmware.fw_size +=
1697 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1698 
1699 			/* TODO: Determine if MEC2 JT FW loading can be removed
1700 				 for all GFX V9 asic and above */
1701 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1702 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1703 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1704 				info->fw = adev->gfx.mec2_fw;
1705 				adev->firmware.fw_size +=
1706 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1707 					PAGE_SIZE);
1708 			}
1709 		}
1710 	}
1711 
1712 out:
1713 	gfx_v9_0_check_if_need_gfxoff(adev);
1714 	gfx_v9_0_check_fw_write_wait(adev);
1715 	if (err) {
1716 		dev_err(adev->dev,
1717 			"gfx9: Failed to load firmware \"%s\"\n",
1718 			fw_name);
1719 		release_firmware(adev->gfx.mec_fw);
1720 		adev->gfx.mec_fw = NULL;
1721 		release_firmware(adev->gfx.mec2_fw);
1722 		adev->gfx.mec2_fw = NULL;
1723 	}
1724 	return err;
1725 }
1726 
1727 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1728 {
1729 	const char *chip_name;
1730 	int r;
1731 
1732 	DRM_DEBUG("\n");
1733 
1734 	switch (adev->ip_versions[GC_HWIP][0]) {
1735 	case IP_VERSION(9, 0, 1):
1736 		chip_name = "vega10";
1737 		break;
1738 	case IP_VERSION(9, 2, 1):
1739 		chip_name = "vega12";
1740 		break;
1741 	case IP_VERSION(9, 4, 0):
1742 		chip_name = "vega20";
1743 		break;
1744 	case IP_VERSION(9, 2, 2):
1745 	case IP_VERSION(9, 1, 0):
1746 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1747 			chip_name = "raven2";
1748 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1749 			chip_name = "picasso";
1750 		else
1751 			chip_name = "raven";
1752 		break;
1753 	case IP_VERSION(9, 4, 1):
1754 		chip_name = "arcturus";
1755 		break;
1756 	case IP_VERSION(9, 3, 0):
1757 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1758 			chip_name = "renoir";
1759 		else
1760 			chip_name = "green_sardine";
1761 		break;
1762 	case IP_VERSION(9, 4, 2):
1763 		chip_name = "aldebaran";
1764 		break;
1765 	default:
1766 		BUG();
1767 	}
1768 
1769 	/* No CPG in Arcturus */
1770 	if (adev->gfx.num_gfx_rings) {
1771 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1772 		if (r)
1773 			return r;
1774 	}
1775 
1776 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1777 	if (r)
1778 		return r;
1779 
1780 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1781 	if (r)
1782 		return r;
1783 
1784 	return r;
1785 }
1786 
1787 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1788 {
1789 	u32 count = 0;
1790 	const struct cs_section_def *sect = NULL;
1791 	const struct cs_extent_def *ext = NULL;
1792 
1793 	/* begin clear state */
1794 	count += 2;
1795 	/* context control state */
1796 	count += 3;
1797 
1798 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1799 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1800 			if (sect->id == SECT_CONTEXT)
1801 				count += 2 + ext->reg_count;
1802 			else
1803 				return 0;
1804 		}
1805 	}
1806 
1807 	/* end clear state */
1808 	count += 2;
1809 	/* clear state */
1810 	count += 2;
1811 
1812 	return count;
1813 }
1814 
1815 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1816 				    volatile u32 *buffer)
1817 {
1818 	u32 count = 0, i;
1819 	const struct cs_section_def *sect = NULL;
1820 	const struct cs_extent_def *ext = NULL;
1821 
1822 	if (adev->gfx.rlc.cs_data == NULL)
1823 		return;
1824 	if (buffer == NULL)
1825 		return;
1826 
1827 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1828 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1829 
1830 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1831 	buffer[count++] = cpu_to_le32(0x80000000);
1832 	buffer[count++] = cpu_to_le32(0x80000000);
1833 
1834 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1835 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1836 			if (sect->id == SECT_CONTEXT) {
1837 				buffer[count++] =
1838 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1839 				buffer[count++] = cpu_to_le32(ext->reg_index -
1840 						PACKET3_SET_CONTEXT_REG_START);
1841 				for (i = 0; i < ext->reg_count; i++)
1842 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1843 			} else {
1844 				return;
1845 			}
1846 		}
1847 	}
1848 
1849 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1850 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1851 
1852 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1853 	buffer[count++] = cpu_to_le32(0);
1854 }
1855 
1856 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1857 {
1858 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1859 	uint32_t pg_always_on_cu_num = 2;
1860 	uint32_t always_on_cu_num;
1861 	uint32_t i, j, k;
1862 	uint32_t mask, cu_bitmap, counter;
1863 
1864 	if (adev->flags & AMD_IS_APU)
1865 		always_on_cu_num = 4;
1866 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1867 		always_on_cu_num = 8;
1868 	else
1869 		always_on_cu_num = 12;
1870 
1871 	mutex_lock(&adev->grbm_idx_mutex);
1872 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1873 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1874 			mask = 1;
1875 			cu_bitmap = 0;
1876 			counter = 0;
1877 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1878 
1879 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1880 				if (cu_info->bitmap[i][j] & mask) {
1881 					if (counter == pg_always_on_cu_num)
1882 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1883 					if (counter < always_on_cu_num)
1884 						cu_bitmap |= mask;
1885 					else
1886 						break;
1887 					counter++;
1888 				}
1889 				mask <<= 1;
1890 			}
1891 
1892 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1893 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1894 		}
1895 	}
1896 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1897 	mutex_unlock(&adev->grbm_idx_mutex);
1898 }
1899 
1900 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1901 {
1902 	uint32_t data;
1903 
1904 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1905 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1906 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1907 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1908 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1909 
1910 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1911 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1912 
1913 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1914 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1915 
1916 	mutex_lock(&adev->grbm_idx_mutex);
1917 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1918 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1919 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1920 
1921 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1922 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1923 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1924 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1925 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1926 
1927 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1928 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1929 	data &= 0x0000FFFF;
1930 	data |= 0x00C00000;
1931 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1932 
1933 	/*
1934 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1935 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1936 	 */
1937 
1938 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1939 	 * but used for RLC_LB_CNTL configuration */
1940 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1941 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1942 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1943 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1944 	mutex_unlock(&adev->grbm_idx_mutex);
1945 
1946 	gfx_v9_0_init_always_on_cu_mask(adev);
1947 }
1948 
1949 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1950 {
1951 	uint32_t data;
1952 
1953 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1954 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1955 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1956 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1957 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1958 
1959 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1960 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1961 
1962 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1963 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1964 
1965 	mutex_lock(&adev->grbm_idx_mutex);
1966 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1967 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1968 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1969 
1970 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1971 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1972 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1973 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1974 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1975 
1976 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1977 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1978 	data &= 0x0000FFFF;
1979 	data |= 0x00C00000;
1980 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1981 
1982 	/*
1983 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1984 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1985 	 */
1986 
1987 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1988 	 * but used for RLC_LB_CNTL configuration */
1989 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1990 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1991 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1992 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1993 	mutex_unlock(&adev->grbm_idx_mutex);
1994 
1995 	gfx_v9_0_init_always_on_cu_mask(adev);
1996 }
1997 
1998 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1999 {
2000 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
2001 }
2002 
2003 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
2004 {
2005 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
2006 		return 5;
2007 	else
2008 		return 4;
2009 }
2010 
2011 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
2012 {
2013 	const struct cs_section_def *cs_data;
2014 	int r;
2015 
2016 	adev->gfx.rlc.cs_data = gfx9_cs_data;
2017 
2018 	cs_data = adev->gfx.rlc.cs_data;
2019 
2020 	if (cs_data) {
2021 		/* init clear state block */
2022 		r = amdgpu_gfx_rlc_init_csb(adev);
2023 		if (r)
2024 			return r;
2025 	}
2026 
2027 	if (adev->flags & AMD_IS_APU) {
2028 		/* TODO: double check the cp_table_size for RV */
2029 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
2030 		r = amdgpu_gfx_rlc_init_cpt(adev);
2031 		if (r)
2032 			return r;
2033 	}
2034 
2035 	switch (adev->ip_versions[GC_HWIP][0]) {
2036 	case IP_VERSION(9, 2, 2):
2037 	case IP_VERSION(9, 1, 0):
2038 		gfx_v9_0_init_lbpw(adev);
2039 		break;
2040 	case IP_VERSION(9, 4, 0):
2041 		gfx_v9_4_init_lbpw(adev);
2042 		break;
2043 	default:
2044 		break;
2045 	}
2046 
2047 	/* init spm vmid with 0xf */
2048 	if (adev->gfx.rlc.funcs->update_spm_vmid)
2049 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
2050 
2051 	return 0;
2052 }
2053 
2054 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
2055 {
2056 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2057 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
2058 }
2059 
2060 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
2061 {
2062 	int r;
2063 	u32 *hpd;
2064 	const __le32 *fw_data;
2065 	unsigned fw_size;
2066 	u32 *fw;
2067 	size_t mec_hpd_size;
2068 
2069 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2070 
2071 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2072 
2073 	/* take ownership of the relevant compute queues */
2074 	amdgpu_gfx_compute_queue_acquire(adev);
2075 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2076 	if (mec_hpd_size) {
2077 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2078 					      AMDGPU_GEM_DOMAIN_VRAM,
2079 					      &adev->gfx.mec.hpd_eop_obj,
2080 					      &adev->gfx.mec.hpd_eop_gpu_addr,
2081 					      (void **)&hpd);
2082 		if (r) {
2083 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2084 			gfx_v9_0_mec_fini(adev);
2085 			return r;
2086 		}
2087 
2088 		memset(hpd, 0, mec_hpd_size);
2089 
2090 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2091 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2092 	}
2093 
2094 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2095 
2096 	fw_data = (const __le32 *)
2097 		(adev->gfx.mec_fw->data +
2098 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2099 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2100 
2101 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2102 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2103 				      &adev->gfx.mec.mec_fw_obj,
2104 				      &adev->gfx.mec.mec_fw_gpu_addr,
2105 				      (void **)&fw);
2106 	if (r) {
2107 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2108 		gfx_v9_0_mec_fini(adev);
2109 		return r;
2110 	}
2111 
2112 	memcpy(fw, fw_data, fw_size);
2113 
2114 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2115 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2116 
2117 	return 0;
2118 }
2119 
2120 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2121 {
2122 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2123 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2124 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2125 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2126 		(SQ_IND_INDEX__FORCE_READ_MASK));
2127 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2128 }
2129 
2130 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2131 			   uint32_t wave, uint32_t thread,
2132 			   uint32_t regno, uint32_t num, uint32_t *out)
2133 {
2134 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2135 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2136 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2137 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2138 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2139 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2140 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2141 	while (num--)
2142 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2143 }
2144 
2145 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2146 {
2147 	/* type 1 wave data */
2148 	dst[(*no_fields)++] = 1;
2149 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2150 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2151 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2152 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2153 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2154 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2155 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2156 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2157 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2158 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2159 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2160 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2161 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2162 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2163 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2164 }
2165 
2166 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2167 				     uint32_t wave, uint32_t start,
2168 				     uint32_t size, uint32_t *dst)
2169 {
2170 	wave_read_regs(
2171 		adev, simd, wave, 0,
2172 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2173 }
2174 
2175 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2176 				     uint32_t wave, uint32_t thread,
2177 				     uint32_t start, uint32_t size,
2178 				     uint32_t *dst)
2179 {
2180 	wave_read_regs(
2181 		adev, simd, wave, thread,
2182 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2183 }
2184 
2185 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2186 				  u32 me, u32 pipe, u32 q, u32 vm)
2187 {
2188 	soc15_grbm_select(adev, me, pipe, q, vm);
2189 }
2190 
2191 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2192         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2193         .select_se_sh = &gfx_v9_0_select_se_sh,
2194         .read_wave_data = &gfx_v9_0_read_wave_data,
2195         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2196         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2197         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2198 };
2199 
2200 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2201 	.ras_late_init = amdgpu_gfx_ras_late_init,
2202 	.ras_fini = amdgpu_gfx_ras_fini,
2203 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2204 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2205 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2206 };
2207 
2208 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2209 {
2210 	u32 gb_addr_config;
2211 	int err;
2212 
2213 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2214 
2215 	switch (adev->ip_versions[GC_HWIP][0]) {
2216 	case IP_VERSION(9, 0, 1):
2217 		adev->gfx.config.max_hw_contexts = 8;
2218 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2219 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2220 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2221 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2222 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2223 		break;
2224 	case IP_VERSION(9, 2, 1):
2225 		adev->gfx.config.max_hw_contexts = 8;
2226 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2227 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2228 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2229 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2230 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2231 		DRM_INFO("fix gfx.config for vega12\n");
2232 		break;
2233 	case IP_VERSION(9, 4, 0):
2234 		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2235 		adev->gfx.config.max_hw_contexts = 8;
2236 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2237 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2238 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2239 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2240 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2241 		gb_addr_config &= ~0xf3e777ff;
2242 		gb_addr_config |= 0x22014042;
2243 		/* check vbios table if gpu info is not available */
2244 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2245 		if (err)
2246 			return err;
2247 		break;
2248 	case IP_VERSION(9, 2, 2):
2249 	case IP_VERSION(9, 1, 0):
2250 		adev->gfx.config.max_hw_contexts = 8;
2251 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2252 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2253 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2254 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2255 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2256 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2257 		else
2258 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2259 		break;
2260 	case IP_VERSION(9, 4, 1):
2261 		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2262 		adev->gfx.config.max_hw_contexts = 8;
2263 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2264 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2265 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2266 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2267 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2268 		gb_addr_config &= ~0xf3e777ff;
2269 		gb_addr_config |= 0x22014042;
2270 		break;
2271 	case IP_VERSION(9, 3, 0):
2272 		adev->gfx.config.max_hw_contexts = 8;
2273 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2274 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2275 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2276 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2277 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2278 		gb_addr_config &= ~0xf3e777ff;
2279 		gb_addr_config |= 0x22010042;
2280 		break;
2281 	case IP_VERSION(9, 4, 2):
2282 		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2283 		adev->gfx.config.max_hw_contexts = 8;
2284 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2285 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2286 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2287 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2288 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2289 		gb_addr_config &= ~0xf3e777ff;
2290 		gb_addr_config |= 0x22014042;
2291 		/* check vbios table if gpu info is not available */
2292 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2293 		if (err)
2294 			return err;
2295 		break;
2296 	default:
2297 		BUG();
2298 		break;
2299 	}
2300 
2301 	adev->gfx.config.gb_addr_config = gb_addr_config;
2302 
2303 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2304 			REG_GET_FIELD(
2305 					adev->gfx.config.gb_addr_config,
2306 					GB_ADDR_CONFIG,
2307 					NUM_PIPES);
2308 
2309 	adev->gfx.config.max_tile_pipes =
2310 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2311 
2312 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2313 			REG_GET_FIELD(
2314 					adev->gfx.config.gb_addr_config,
2315 					GB_ADDR_CONFIG,
2316 					NUM_BANKS);
2317 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2318 			REG_GET_FIELD(
2319 					adev->gfx.config.gb_addr_config,
2320 					GB_ADDR_CONFIG,
2321 					MAX_COMPRESSED_FRAGS);
2322 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2323 			REG_GET_FIELD(
2324 					adev->gfx.config.gb_addr_config,
2325 					GB_ADDR_CONFIG,
2326 					NUM_RB_PER_SE);
2327 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2328 			REG_GET_FIELD(
2329 					adev->gfx.config.gb_addr_config,
2330 					GB_ADDR_CONFIG,
2331 					NUM_SHADER_ENGINES);
2332 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2333 			REG_GET_FIELD(
2334 					adev->gfx.config.gb_addr_config,
2335 					GB_ADDR_CONFIG,
2336 					PIPE_INTERLEAVE_SIZE));
2337 
2338 	return 0;
2339 }
2340 
2341 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2342 				      int mec, int pipe, int queue)
2343 {
2344 	unsigned irq_type;
2345 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2346 	unsigned int hw_prio;
2347 
2348 	ring = &adev->gfx.compute_ring[ring_id];
2349 
2350 	/* mec0 is me1 */
2351 	ring->me = mec + 1;
2352 	ring->pipe = pipe;
2353 	ring->queue = queue;
2354 
2355 	ring->ring_obj = NULL;
2356 	ring->use_doorbell = true;
2357 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2358 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2359 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2360 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2361 
2362 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2363 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2364 		+ ring->pipe;
2365 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2366 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2367 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2368 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2369 				hw_prio, NULL);
2370 }
2371 
2372 static int gfx_v9_0_sw_init(void *handle)
2373 {
2374 	int i, j, k, r, ring_id;
2375 	struct amdgpu_ring *ring;
2376 	struct amdgpu_kiq *kiq;
2377 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2378 
2379 	switch (adev->ip_versions[GC_HWIP][0]) {
2380 	case IP_VERSION(9, 0, 1):
2381 	case IP_VERSION(9, 2, 1):
2382 	case IP_VERSION(9, 4, 0):
2383 	case IP_VERSION(9, 2, 2):
2384 	case IP_VERSION(9, 1, 0):
2385 	case IP_VERSION(9, 4, 1):
2386 	case IP_VERSION(9, 3, 0):
2387 	case IP_VERSION(9, 4, 2):
2388 		adev->gfx.mec.num_mec = 2;
2389 		break;
2390 	default:
2391 		adev->gfx.mec.num_mec = 1;
2392 		break;
2393 	}
2394 
2395 	adev->gfx.mec.num_pipe_per_mec = 4;
2396 	adev->gfx.mec.num_queue_per_pipe = 8;
2397 
2398 	/* EOP Event */
2399 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2400 	if (r)
2401 		return r;
2402 
2403 	/* Privileged reg */
2404 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2405 			      &adev->gfx.priv_reg_irq);
2406 	if (r)
2407 		return r;
2408 
2409 	/* Privileged inst */
2410 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2411 			      &adev->gfx.priv_inst_irq);
2412 	if (r)
2413 		return r;
2414 
2415 	/* ECC error */
2416 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2417 			      &adev->gfx.cp_ecc_error_irq);
2418 	if (r)
2419 		return r;
2420 
2421 	/* FUE error */
2422 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2423 			      &adev->gfx.cp_ecc_error_irq);
2424 	if (r)
2425 		return r;
2426 
2427 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2428 
2429 	gfx_v9_0_scratch_init(adev);
2430 
2431 	r = gfx_v9_0_init_microcode(adev);
2432 	if (r) {
2433 		DRM_ERROR("Failed to load gfx firmware!\n");
2434 		return r;
2435 	}
2436 
2437 	r = adev->gfx.rlc.funcs->init(adev);
2438 	if (r) {
2439 		DRM_ERROR("Failed to init rlc BOs!\n");
2440 		return r;
2441 	}
2442 
2443 	r = gfx_v9_0_mec_init(adev);
2444 	if (r) {
2445 		DRM_ERROR("Failed to init MEC BOs!\n");
2446 		return r;
2447 	}
2448 
2449 	/* set up the gfx ring */
2450 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2451 		ring = &adev->gfx.gfx_ring[i];
2452 		ring->ring_obj = NULL;
2453 		if (!i)
2454 			sprintf(ring->name, "gfx");
2455 		else
2456 			sprintf(ring->name, "gfx_%d", i);
2457 		ring->use_doorbell = true;
2458 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2459 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2460 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2461 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2462 		if (r)
2463 			return r;
2464 	}
2465 
2466 	/* set up the compute queues - allocate horizontally across pipes */
2467 	ring_id = 0;
2468 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2469 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2470 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2471 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2472 					continue;
2473 
2474 				r = gfx_v9_0_compute_ring_init(adev,
2475 							       ring_id,
2476 							       i, k, j);
2477 				if (r)
2478 					return r;
2479 
2480 				ring_id++;
2481 			}
2482 		}
2483 	}
2484 
2485 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2486 	if (r) {
2487 		DRM_ERROR("Failed to init KIQ BOs!\n");
2488 		return r;
2489 	}
2490 
2491 	kiq = &adev->gfx.kiq;
2492 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2493 	if (r)
2494 		return r;
2495 
2496 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2497 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2498 	if (r)
2499 		return r;
2500 
2501 	adev->gfx.ce_ram_size = 0x8000;
2502 
2503 	r = gfx_v9_0_gpu_early_init(adev);
2504 	if (r)
2505 		return r;
2506 
2507 	return 0;
2508 }
2509 
2510 
2511 static int gfx_v9_0_sw_fini(void *handle)
2512 {
2513 	int i;
2514 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2515 
2516 	if (adev->gfx.ras_funcs &&
2517 	    adev->gfx.ras_funcs->ras_fini)
2518 		adev->gfx.ras_funcs->ras_fini(adev);
2519 
2520 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2521 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2522 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2523 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2524 
2525 	amdgpu_gfx_mqd_sw_fini(adev);
2526 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2527 	amdgpu_gfx_kiq_fini(adev);
2528 
2529 	gfx_v9_0_mec_fini(adev);
2530 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2531 				&adev->gfx.rlc.clear_state_gpu_addr,
2532 				(void **)&adev->gfx.rlc.cs_ptr);
2533 	if (adev->flags & AMD_IS_APU) {
2534 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2535 				&adev->gfx.rlc.cp_table_gpu_addr,
2536 				(void **)&adev->gfx.rlc.cp_table_ptr);
2537 	}
2538 	gfx_v9_0_free_microcode(adev);
2539 
2540 	return 0;
2541 }
2542 
2543 
2544 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2545 {
2546 	/* TODO */
2547 }
2548 
2549 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2550 			   u32 instance)
2551 {
2552 	u32 data;
2553 
2554 	if (instance == 0xffffffff)
2555 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2556 	else
2557 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2558 
2559 	if (se_num == 0xffffffff)
2560 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2561 	else
2562 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2563 
2564 	if (sh_num == 0xffffffff)
2565 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2566 	else
2567 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2568 
2569 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2570 }
2571 
2572 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2573 {
2574 	u32 data, mask;
2575 
2576 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2577 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2578 
2579 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2580 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2581 
2582 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2583 					 adev->gfx.config.max_sh_per_se);
2584 
2585 	return (~data) & mask;
2586 }
2587 
2588 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2589 {
2590 	int i, j;
2591 	u32 data;
2592 	u32 active_rbs = 0;
2593 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2594 					adev->gfx.config.max_sh_per_se;
2595 
2596 	mutex_lock(&adev->grbm_idx_mutex);
2597 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2598 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2599 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2600 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2601 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2602 					       rb_bitmap_width_per_sh);
2603 		}
2604 	}
2605 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2606 	mutex_unlock(&adev->grbm_idx_mutex);
2607 
2608 	adev->gfx.config.backend_enable_mask = active_rbs;
2609 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2610 }
2611 
2612 #define DEFAULT_SH_MEM_BASES	(0x6000)
2613 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2614 {
2615 	int i;
2616 	uint32_t sh_mem_config;
2617 	uint32_t sh_mem_bases;
2618 
2619 	/*
2620 	 * Configure apertures:
2621 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2622 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2623 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2624 	 */
2625 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2626 
2627 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2628 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2629 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2630 
2631 	mutex_lock(&adev->srbm_mutex);
2632 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2633 		soc15_grbm_select(adev, 0, 0, 0, i);
2634 		/* CP and shaders */
2635 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2636 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2637 	}
2638 	soc15_grbm_select(adev, 0, 0, 0, 0);
2639 	mutex_unlock(&adev->srbm_mutex);
2640 
2641 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2642 	   acccess. These should be enabled by FW for target VMIDs. */
2643 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2644 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2645 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2646 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2647 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2648 	}
2649 }
2650 
2651 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2652 {
2653 	int vmid;
2654 
2655 	/*
2656 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2657 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2658 	 * the driver can enable them for graphics. VMID0 should maintain
2659 	 * access so that HWS firmware can save/restore entries.
2660 	 */
2661 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2662 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2663 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2664 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2665 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2666 	}
2667 }
2668 
2669 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2670 {
2671 	uint32_t tmp;
2672 
2673 	switch (adev->ip_versions[GC_HWIP][0]) {
2674 	case IP_VERSION(9, 4, 1):
2675 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2676 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2677 					DISABLE_BARRIER_WAITCNT, 1);
2678 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2679 		break;
2680 	default:
2681 		break;
2682 	}
2683 }
2684 
2685 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2686 {
2687 	u32 tmp;
2688 	int i;
2689 
2690 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2691 
2692 	gfx_v9_0_tiling_mode_table_init(adev);
2693 
2694 	gfx_v9_0_setup_rb(adev);
2695 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2696 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2697 
2698 	/* XXX SH_MEM regs */
2699 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2700 	mutex_lock(&adev->srbm_mutex);
2701 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2702 		soc15_grbm_select(adev, 0, 0, 0, i);
2703 		/* CP and shaders */
2704 		if (i == 0) {
2705 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2706 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2707 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2708 					    !!adev->gmc.noretry);
2709 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2710 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2711 		} else {
2712 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2713 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2714 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2715 					    !!adev->gmc.noretry);
2716 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2717 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2718 				(adev->gmc.private_aperture_start >> 48));
2719 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2720 				(adev->gmc.shared_aperture_start >> 48));
2721 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2722 		}
2723 	}
2724 	soc15_grbm_select(adev, 0, 0, 0, 0);
2725 
2726 	mutex_unlock(&adev->srbm_mutex);
2727 
2728 	gfx_v9_0_init_compute_vmid(adev);
2729 	gfx_v9_0_init_gds_vmid(adev);
2730 	gfx_v9_0_init_sq_config(adev);
2731 }
2732 
2733 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2734 {
2735 	u32 i, j, k;
2736 	u32 mask;
2737 
2738 	mutex_lock(&adev->grbm_idx_mutex);
2739 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2740 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2741 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2742 			for (k = 0; k < adev->usec_timeout; k++) {
2743 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2744 					break;
2745 				udelay(1);
2746 			}
2747 			if (k == adev->usec_timeout) {
2748 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2749 						      0xffffffff, 0xffffffff);
2750 				mutex_unlock(&adev->grbm_idx_mutex);
2751 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2752 					 i, j);
2753 				return;
2754 			}
2755 		}
2756 	}
2757 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2758 	mutex_unlock(&adev->grbm_idx_mutex);
2759 
2760 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2761 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2762 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2763 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2764 	for (k = 0; k < adev->usec_timeout; k++) {
2765 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2766 			break;
2767 		udelay(1);
2768 	}
2769 }
2770 
2771 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2772 					       bool enable)
2773 {
2774 	u32 tmp;
2775 
2776 	/* These interrupts should be enabled to drive DS clock */
2777 
2778 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2779 
2780 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2781 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2782 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2783 	if(adev->gfx.num_gfx_rings)
2784 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2785 
2786 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2787 }
2788 
2789 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2790 {
2791 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2792 	/* csib */
2793 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2794 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2795 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2796 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2797 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2798 			adev->gfx.rlc.clear_state_size);
2799 }
2800 
2801 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2802 				int indirect_offset,
2803 				int list_size,
2804 				int *unique_indirect_regs,
2805 				int unique_indirect_reg_count,
2806 				int *indirect_start_offsets,
2807 				int *indirect_start_offsets_count,
2808 				int max_start_offsets_count)
2809 {
2810 	int idx;
2811 
2812 	for (; indirect_offset < list_size; indirect_offset++) {
2813 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2814 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2815 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2816 
2817 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2818 			indirect_offset += 2;
2819 
2820 			/* look for the matching indice */
2821 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2822 				if (unique_indirect_regs[idx] ==
2823 					register_list_format[indirect_offset] ||
2824 					!unique_indirect_regs[idx])
2825 					break;
2826 			}
2827 
2828 			BUG_ON(idx >= unique_indirect_reg_count);
2829 
2830 			if (!unique_indirect_regs[idx])
2831 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2832 
2833 			indirect_offset++;
2834 		}
2835 	}
2836 }
2837 
2838 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2839 {
2840 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2841 	int unique_indirect_reg_count = 0;
2842 
2843 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2844 	int indirect_start_offsets_count = 0;
2845 
2846 	int list_size = 0;
2847 	int i = 0, j = 0;
2848 	u32 tmp = 0;
2849 
2850 	u32 *register_list_format =
2851 		kmemdup(adev->gfx.rlc.register_list_format,
2852 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2853 	if (!register_list_format)
2854 		return -ENOMEM;
2855 
2856 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2857 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2858 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2859 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2860 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2861 				    unique_indirect_regs,
2862 				    unique_indirect_reg_count,
2863 				    indirect_start_offsets,
2864 				    &indirect_start_offsets_count,
2865 				    ARRAY_SIZE(indirect_start_offsets));
2866 
2867 	/* enable auto inc in case it is disabled */
2868 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2869 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2870 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2871 
2872 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2873 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2874 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2875 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2876 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2877 			adev->gfx.rlc.register_restore[i]);
2878 
2879 	/* load indirect register */
2880 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2881 		adev->gfx.rlc.reg_list_format_start);
2882 
2883 	/* direct register portion */
2884 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2885 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2886 			register_list_format[i]);
2887 
2888 	/* indirect register portion */
2889 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2890 		if (register_list_format[i] == 0xFFFFFFFF) {
2891 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2892 			continue;
2893 		}
2894 
2895 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2896 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2897 
2898 		for (j = 0; j < unique_indirect_reg_count; j++) {
2899 			if (register_list_format[i] == unique_indirect_regs[j]) {
2900 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2901 				break;
2902 			}
2903 		}
2904 
2905 		BUG_ON(j >= unique_indirect_reg_count);
2906 
2907 		i++;
2908 	}
2909 
2910 	/* set save/restore list size */
2911 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2912 	list_size = list_size >> 1;
2913 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2914 		adev->gfx.rlc.reg_restore_list_size);
2915 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2916 
2917 	/* write the starting offsets to RLC scratch ram */
2918 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2919 		adev->gfx.rlc.starting_offsets_start);
2920 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2921 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2922 		       indirect_start_offsets[i]);
2923 
2924 	/* load unique indirect regs*/
2925 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2926 		if (unique_indirect_regs[i] != 0) {
2927 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2928 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2929 			       unique_indirect_regs[i] & 0x3FFFF);
2930 
2931 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2932 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2933 			       unique_indirect_regs[i] >> 20);
2934 		}
2935 	}
2936 
2937 	kfree(register_list_format);
2938 	return 0;
2939 }
2940 
2941 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2942 {
2943 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2944 }
2945 
2946 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2947 					     bool enable)
2948 {
2949 	uint32_t data = 0;
2950 	uint32_t default_data = 0;
2951 
2952 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2953 	if (enable) {
2954 		/* enable GFXIP control over CGPG */
2955 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2956 		if(default_data != data)
2957 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2958 
2959 		/* update status */
2960 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2961 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2962 		if(default_data != data)
2963 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2964 	} else {
2965 		/* restore GFXIP control over GCPG */
2966 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2967 		if(default_data != data)
2968 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2969 	}
2970 }
2971 
2972 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2973 {
2974 	uint32_t data = 0;
2975 
2976 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2977 			      AMD_PG_SUPPORT_GFX_SMG |
2978 			      AMD_PG_SUPPORT_GFX_DMG)) {
2979 		/* init IDLE_POLL_COUNT = 60 */
2980 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2981 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2982 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2983 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2984 
2985 		/* init RLC PG Delay */
2986 		data = 0;
2987 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2988 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2989 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2990 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2991 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2992 
2993 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2994 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2995 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2996 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2997 
2998 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2999 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
3000 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
3001 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
3002 
3003 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
3004 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3005 
3006 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
3007 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3008 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
3009 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
3010 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
3011 	}
3012 }
3013 
3014 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3015 						bool enable)
3016 {
3017 	uint32_t data = 0;
3018 	uint32_t default_data = 0;
3019 
3020 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3021 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3022 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
3023 			     enable ? 1 : 0);
3024 	if (default_data != data)
3025 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3026 }
3027 
3028 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3029 						bool enable)
3030 {
3031 	uint32_t data = 0;
3032 	uint32_t default_data = 0;
3033 
3034 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3035 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3036 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
3037 			     enable ? 1 : 0);
3038 	if(default_data != data)
3039 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3040 }
3041 
3042 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
3043 					bool enable)
3044 {
3045 	uint32_t data = 0;
3046 	uint32_t default_data = 0;
3047 
3048 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3049 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3050 			     CP_PG_DISABLE,
3051 			     enable ? 0 : 1);
3052 	if(default_data != data)
3053 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3054 }
3055 
3056 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3057 						bool enable)
3058 {
3059 	uint32_t data, default_data;
3060 
3061 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3062 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3063 			     GFX_POWER_GATING_ENABLE,
3064 			     enable ? 1 : 0);
3065 	if(default_data != data)
3066 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3067 }
3068 
3069 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3070 						bool enable)
3071 {
3072 	uint32_t data, default_data;
3073 
3074 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3075 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3076 			     GFX_PIPELINE_PG_ENABLE,
3077 			     enable ? 1 : 0);
3078 	if(default_data != data)
3079 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3080 
3081 	if (!enable)
3082 		/* read any GFX register to wake up GFX */
3083 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3084 }
3085 
3086 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3087 						       bool enable)
3088 {
3089 	uint32_t data, default_data;
3090 
3091 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3092 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3093 			     STATIC_PER_CU_PG_ENABLE,
3094 			     enable ? 1 : 0);
3095 	if(default_data != data)
3096 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3097 }
3098 
3099 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3100 						bool enable)
3101 {
3102 	uint32_t data, default_data;
3103 
3104 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3105 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3106 			     DYN_PER_CU_PG_ENABLE,
3107 			     enable ? 1 : 0);
3108 	if(default_data != data)
3109 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3110 }
3111 
3112 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3113 {
3114 	gfx_v9_0_init_csb(adev);
3115 
3116 	/*
3117 	 * Rlc save restore list is workable since v2_1.
3118 	 * And it's needed by gfxoff feature.
3119 	 */
3120 	if (adev->gfx.rlc.is_rlc_v2_1) {
3121 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3122 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3123 			gfx_v9_1_init_rlc_save_restore_list(adev);
3124 		gfx_v9_0_enable_save_restore_machine(adev);
3125 	}
3126 
3127 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3128 			      AMD_PG_SUPPORT_GFX_SMG |
3129 			      AMD_PG_SUPPORT_GFX_DMG |
3130 			      AMD_PG_SUPPORT_CP |
3131 			      AMD_PG_SUPPORT_GDS |
3132 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3133 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3134 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3135 		gfx_v9_0_init_gfx_power_gating(adev);
3136 	}
3137 }
3138 
3139 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3140 {
3141 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3142 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3143 	gfx_v9_0_wait_for_rlc_serdes(adev);
3144 }
3145 
3146 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3147 {
3148 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3149 	udelay(50);
3150 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3151 	udelay(50);
3152 }
3153 
3154 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3155 {
3156 #ifdef AMDGPU_RLC_DEBUG_RETRY
3157 	u32 rlc_ucode_ver;
3158 #endif
3159 
3160 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3161 	udelay(50);
3162 
3163 	/* carrizo do enable cp interrupt after cp inited */
3164 	if (!(adev->flags & AMD_IS_APU)) {
3165 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3166 		udelay(50);
3167 	}
3168 
3169 #ifdef AMDGPU_RLC_DEBUG_RETRY
3170 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3171 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3172 	if(rlc_ucode_ver == 0x108) {
3173 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3174 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3175 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3176 		 * default is 0x9C4 to create a 100us interval */
3177 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3178 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3179 		 * to disable the page fault retry interrupts, default is
3180 		 * 0x100 (256) */
3181 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3182 	}
3183 #endif
3184 }
3185 
3186 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3187 {
3188 	const struct rlc_firmware_header_v2_0 *hdr;
3189 	const __le32 *fw_data;
3190 	unsigned i, fw_size;
3191 
3192 	if (!adev->gfx.rlc_fw)
3193 		return -EINVAL;
3194 
3195 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3196 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3197 
3198 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3199 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3200 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3201 
3202 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3203 			RLCG_UCODE_LOADING_START_ADDRESS);
3204 	for (i = 0; i < fw_size; i++)
3205 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3206 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3207 
3208 	return 0;
3209 }
3210 
3211 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3212 {
3213 	int r;
3214 
3215 	if (amdgpu_sriov_vf(adev)) {
3216 		gfx_v9_0_init_csb(adev);
3217 		return 0;
3218 	}
3219 
3220 	adev->gfx.rlc.funcs->stop(adev);
3221 
3222 	/* disable CG */
3223 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3224 
3225 	gfx_v9_0_init_pg(adev);
3226 
3227 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3228 		/* legacy rlc firmware loading */
3229 		r = gfx_v9_0_rlc_load_microcode(adev);
3230 		if (r)
3231 			return r;
3232 	}
3233 
3234 	switch (adev->ip_versions[GC_HWIP][0]) {
3235 	case IP_VERSION(9, 2, 2):
3236 	case IP_VERSION(9, 1, 0):
3237 		if (amdgpu_lbpw == 0)
3238 			gfx_v9_0_enable_lbpw(adev, false);
3239 		else
3240 			gfx_v9_0_enable_lbpw(adev, true);
3241 		break;
3242 	case IP_VERSION(9, 4, 0):
3243 		if (amdgpu_lbpw > 0)
3244 			gfx_v9_0_enable_lbpw(adev, true);
3245 		else
3246 			gfx_v9_0_enable_lbpw(adev, false);
3247 		break;
3248 	default:
3249 		break;
3250 	}
3251 
3252 	adev->gfx.rlc.funcs->start(adev);
3253 
3254 	return 0;
3255 }
3256 
3257 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3258 {
3259 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3260 
3261 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3262 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3263 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3264 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3265 	udelay(50);
3266 }
3267 
3268 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3269 {
3270 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3271 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3272 	const struct gfx_firmware_header_v1_0 *me_hdr;
3273 	const __le32 *fw_data;
3274 	unsigned i, fw_size;
3275 
3276 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3277 		return -EINVAL;
3278 
3279 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3280 		adev->gfx.pfp_fw->data;
3281 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3282 		adev->gfx.ce_fw->data;
3283 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3284 		adev->gfx.me_fw->data;
3285 
3286 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3287 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3288 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3289 
3290 	gfx_v9_0_cp_gfx_enable(adev, false);
3291 
3292 	/* PFP */
3293 	fw_data = (const __le32 *)
3294 		(adev->gfx.pfp_fw->data +
3295 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3296 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3297 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3298 	for (i = 0; i < fw_size; i++)
3299 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3300 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3301 
3302 	/* CE */
3303 	fw_data = (const __le32 *)
3304 		(adev->gfx.ce_fw->data +
3305 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3306 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3307 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3308 	for (i = 0; i < fw_size; i++)
3309 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3310 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3311 
3312 	/* ME */
3313 	fw_data = (const __le32 *)
3314 		(adev->gfx.me_fw->data +
3315 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3316 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3317 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3318 	for (i = 0; i < fw_size; i++)
3319 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3320 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3321 
3322 	return 0;
3323 }
3324 
3325 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3326 {
3327 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3328 	const struct cs_section_def *sect = NULL;
3329 	const struct cs_extent_def *ext = NULL;
3330 	int r, i, tmp;
3331 
3332 	/* init the CP */
3333 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3334 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3335 
3336 	gfx_v9_0_cp_gfx_enable(adev, true);
3337 
3338 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3339 	if (r) {
3340 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3341 		return r;
3342 	}
3343 
3344 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3345 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3346 
3347 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3348 	amdgpu_ring_write(ring, 0x80000000);
3349 	amdgpu_ring_write(ring, 0x80000000);
3350 
3351 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3352 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3353 			if (sect->id == SECT_CONTEXT) {
3354 				amdgpu_ring_write(ring,
3355 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3356 					       ext->reg_count));
3357 				amdgpu_ring_write(ring,
3358 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3359 				for (i = 0; i < ext->reg_count; i++)
3360 					amdgpu_ring_write(ring, ext->extent[i]);
3361 			}
3362 		}
3363 	}
3364 
3365 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3366 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3367 
3368 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3369 	amdgpu_ring_write(ring, 0);
3370 
3371 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3372 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3373 	amdgpu_ring_write(ring, 0x8000);
3374 	amdgpu_ring_write(ring, 0x8000);
3375 
3376 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3377 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3378 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3379 	amdgpu_ring_write(ring, tmp);
3380 	amdgpu_ring_write(ring, 0);
3381 
3382 	amdgpu_ring_commit(ring);
3383 
3384 	return 0;
3385 }
3386 
3387 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3388 {
3389 	struct amdgpu_ring *ring;
3390 	u32 tmp;
3391 	u32 rb_bufsz;
3392 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3393 
3394 	/* Set the write pointer delay */
3395 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3396 
3397 	/* set the RB to use vmid 0 */
3398 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3399 
3400 	/* Set ring buffer size */
3401 	ring = &adev->gfx.gfx_ring[0];
3402 	rb_bufsz = order_base_2(ring->ring_size / 8);
3403 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3404 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3405 #ifdef __BIG_ENDIAN
3406 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3407 #endif
3408 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3409 
3410 	/* Initialize the ring buffer's write pointers */
3411 	ring->wptr = 0;
3412 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3413 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3414 
3415 	/* set the wb address wether it's enabled or not */
3416 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3417 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3418 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3419 
3420 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3421 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3422 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3423 
3424 	mdelay(1);
3425 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3426 
3427 	rb_addr = ring->gpu_addr >> 8;
3428 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3429 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3430 
3431 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3432 	if (ring->use_doorbell) {
3433 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3434 				    DOORBELL_OFFSET, ring->doorbell_index);
3435 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3436 				    DOORBELL_EN, 1);
3437 	} else {
3438 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3439 	}
3440 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3441 
3442 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3443 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3444 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3445 
3446 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3447 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3448 
3449 
3450 	/* start the ring */
3451 	gfx_v9_0_cp_gfx_start(adev);
3452 	ring->sched.ready = true;
3453 
3454 	return 0;
3455 }
3456 
3457 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3458 {
3459 	if (enable) {
3460 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3461 	} else {
3462 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3463 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3464 		adev->gfx.kiq.ring.sched.ready = false;
3465 	}
3466 	udelay(50);
3467 }
3468 
3469 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3470 {
3471 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3472 	const __le32 *fw_data;
3473 	unsigned i;
3474 	u32 tmp;
3475 
3476 	if (!adev->gfx.mec_fw)
3477 		return -EINVAL;
3478 
3479 	gfx_v9_0_cp_compute_enable(adev, false);
3480 
3481 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3482 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3483 
3484 	fw_data = (const __le32 *)
3485 		(adev->gfx.mec_fw->data +
3486 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3487 	tmp = 0;
3488 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3489 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3490 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3491 
3492 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3493 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3494 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3495 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3496 
3497 	/* MEC1 */
3498 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3499 			 mec_hdr->jt_offset);
3500 	for (i = 0; i < mec_hdr->jt_size; i++)
3501 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3502 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3503 
3504 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3505 			adev->gfx.mec_fw_version);
3506 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3507 
3508 	return 0;
3509 }
3510 
3511 /* KIQ functions */
3512 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3513 {
3514 	uint32_t tmp;
3515 	struct amdgpu_device *adev = ring->adev;
3516 
3517 	/* tell RLC which is KIQ queue */
3518 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3519 	tmp &= 0xffffff00;
3520 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3521 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3522 	tmp |= 0x80;
3523 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3524 }
3525 
3526 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3527 {
3528 	struct amdgpu_device *adev = ring->adev;
3529 
3530 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3531 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3532 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3533 			mqd->cp_hqd_queue_priority =
3534 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3535 		}
3536 	}
3537 }
3538 
3539 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3540 {
3541 	struct amdgpu_device *adev = ring->adev;
3542 	struct v9_mqd *mqd = ring->mqd_ptr;
3543 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3544 	uint32_t tmp;
3545 
3546 	mqd->header = 0xC0310800;
3547 	mqd->compute_pipelinestat_enable = 0x00000001;
3548 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3549 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3550 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3551 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3552 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3553 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3554 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3555 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3556 	mqd->compute_misc_reserved = 0x00000003;
3557 
3558 	mqd->dynamic_cu_mask_addr_lo =
3559 		lower_32_bits(ring->mqd_gpu_addr
3560 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3561 	mqd->dynamic_cu_mask_addr_hi =
3562 		upper_32_bits(ring->mqd_gpu_addr
3563 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3564 
3565 	eop_base_addr = ring->eop_gpu_addr >> 8;
3566 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3567 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3568 
3569 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3570 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3571 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3572 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3573 
3574 	mqd->cp_hqd_eop_control = tmp;
3575 
3576 	/* enable doorbell? */
3577 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3578 
3579 	if (ring->use_doorbell) {
3580 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3581 				    DOORBELL_OFFSET, ring->doorbell_index);
3582 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3583 				    DOORBELL_EN, 1);
3584 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3585 				    DOORBELL_SOURCE, 0);
3586 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3587 				    DOORBELL_HIT, 0);
3588 	} else {
3589 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3590 					 DOORBELL_EN, 0);
3591 	}
3592 
3593 	mqd->cp_hqd_pq_doorbell_control = tmp;
3594 
3595 	/* disable the queue if it's active */
3596 	ring->wptr = 0;
3597 	mqd->cp_hqd_dequeue_request = 0;
3598 	mqd->cp_hqd_pq_rptr = 0;
3599 	mqd->cp_hqd_pq_wptr_lo = 0;
3600 	mqd->cp_hqd_pq_wptr_hi = 0;
3601 
3602 	/* set the pointer to the MQD */
3603 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3604 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3605 
3606 	/* set MQD vmid to 0 */
3607 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3608 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3609 	mqd->cp_mqd_control = tmp;
3610 
3611 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3612 	hqd_gpu_addr = ring->gpu_addr >> 8;
3613 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3614 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3615 
3616 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3617 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3618 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3619 			    (order_base_2(ring->ring_size / 4) - 1));
3620 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3621 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3622 #ifdef __BIG_ENDIAN
3623 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3624 #endif
3625 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3626 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3627 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3628 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3629 	mqd->cp_hqd_pq_control = tmp;
3630 
3631 	/* set the wb address whether it's enabled or not */
3632 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3633 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3634 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3635 		upper_32_bits(wb_gpu_addr) & 0xffff;
3636 
3637 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3638 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3639 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3640 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3641 
3642 	tmp = 0;
3643 	/* enable the doorbell if requested */
3644 	if (ring->use_doorbell) {
3645 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3646 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3647 				DOORBELL_OFFSET, ring->doorbell_index);
3648 
3649 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3650 					 DOORBELL_EN, 1);
3651 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3652 					 DOORBELL_SOURCE, 0);
3653 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3654 					 DOORBELL_HIT, 0);
3655 	}
3656 
3657 	mqd->cp_hqd_pq_doorbell_control = tmp;
3658 
3659 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3660 	ring->wptr = 0;
3661 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3662 
3663 	/* set the vmid for the queue */
3664 	mqd->cp_hqd_vmid = 0;
3665 
3666 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3667 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3668 	mqd->cp_hqd_persistent_state = tmp;
3669 
3670 	/* set MIN_IB_AVAIL_SIZE */
3671 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3672 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3673 	mqd->cp_hqd_ib_control = tmp;
3674 
3675 	/* set static priority for a queue/ring */
3676 	gfx_v9_0_mqd_set_priority(ring, mqd);
3677 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3678 
3679 	/* map_queues packet doesn't need activate the queue,
3680 	 * so only kiq need set this field.
3681 	 */
3682 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3683 		mqd->cp_hqd_active = 1;
3684 
3685 	return 0;
3686 }
3687 
3688 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3689 {
3690 	struct amdgpu_device *adev = ring->adev;
3691 	struct v9_mqd *mqd = ring->mqd_ptr;
3692 	int j;
3693 
3694 	/* disable wptr polling */
3695 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3696 
3697 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3698 	       mqd->cp_hqd_eop_base_addr_lo);
3699 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3700 	       mqd->cp_hqd_eop_base_addr_hi);
3701 
3702 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3703 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3704 	       mqd->cp_hqd_eop_control);
3705 
3706 	/* enable doorbell? */
3707 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3708 	       mqd->cp_hqd_pq_doorbell_control);
3709 
3710 	/* disable the queue if it's active */
3711 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3712 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3713 		for (j = 0; j < adev->usec_timeout; j++) {
3714 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3715 				break;
3716 			udelay(1);
3717 		}
3718 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3719 		       mqd->cp_hqd_dequeue_request);
3720 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3721 		       mqd->cp_hqd_pq_rptr);
3722 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3723 		       mqd->cp_hqd_pq_wptr_lo);
3724 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3725 		       mqd->cp_hqd_pq_wptr_hi);
3726 	}
3727 
3728 	/* set the pointer to the MQD */
3729 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3730 	       mqd->cp_mqd_base_addr_lo);
3731 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3732 	       mqd->cp_mqd_base_addr_hi);
3733 
3734 	/* set MQD vmid to 0 */
3735 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3736 	       mqd->cp_mqd_control);
3737 
3738 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3739 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3740 	       mqd->cp_hqd_pq_base_lo);
3741 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3742 	       mqd->cp_hqd_pq_base_hi);
3743 
3744 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3745 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3746 	       mqd->cp_hqd_pq_control);
3747 
3748 	/* set the wb address whether it's enabled or not */
3749 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3750 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3751 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3752 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3753 
3754 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3755 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3756 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3757 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3758 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3759 
3760 	/* enable the doorbell if requested */
3761 	if (ring->use_doorbell) {
3762 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3763 					(adev->doorbell_index.kiq * 2) << 2);
3764 		/* If GC has entered CGPG, ringing doorbell > first page
3765 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3766 		 * workaround this issue. And this change has to align with firmware
3767 		 * update.
3768 		 */
3769 		if (check_if_enlarge_doorbell_range(adev))
3770 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3771 					(adev->doorbell.size - 4));
3772 		else
3773 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3774 					(adev->doorbell_index.userqueue_end * 2) << 2);
3775 	}
3776 
3777 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3778 	       mqd->cp_hqd_pq_doorbell_control);
3779 
3780 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3781 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3782 	       mqd->cp_hqd_pq_wptr_lo);
3783 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3784 	       mqd->cp_hqd_pq_wptr_hi);
3785 
3786 	/* set the vmid for the queue */
3787 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3788 
3789 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3790 	       mqd->cp_hqd_persistent_state);
3791 
3792 	/* activate the queue */
3793 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3794 	       mqd->cp_hqd_active);
3795 
3796 	if (ring->use_doorbell)
3797 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3798 
3799 	return 0;
3800 }
3801 
3802 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3803 {
3804 	struct amdgpu_device *adev = ring->adev;
3805 	int j;
3806 
3807 	/* disable the queue if it's active */
3808 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3809 
3810 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3811 
3812 		for (j = 0; j < adev->usec_timeout; j++) {
3813 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3814 				break;
3815 			udelay(1);
3816 		}
3817 
3818 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3819 			DRM_DEBUG("KIQ dequeue request failed.\n");
3820 
3821 			/* Manual disable if dequeue request times out */
3822 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3823 		}
3824 
3825 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3826 		      0);
3827 	}
3828 
3829 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3830 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3831 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3832 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3833 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3834 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3835 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3836 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3837 
3838 	return 0;
3839 }
3840 
3841 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3842 {
3843 	struct amdgpu_device *adev = ring->adev;
3844 	struct v9_mqd *mqd = ring->mqd_ptr;
3845 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3846 	struct v9_mqd *tmp_mqd;
3847 
3848 	gfx_v9_0_kiq_setting(ring);
3849 
3850 	/* GPU could be in bad state during probe, driver trigger the reset
3851 	 * after load the SMU, in this case , the mqd is not be initialized.
3852 	 * driver need to re-init the mqd.
3853 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3854 	 */
3855 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3856 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3857 		/* for GPU_RESET case , reset MQD to a clean status */
3858 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3859 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3860 
3861 		/* reset ring buffer */
3862 		ring->wptr = 0;
3863 		amdgpu_ring_clear_ring(ring);
3864 
3865 		mutex_lock(&adev->srbm_mutex);
3866 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3867 		gfx_v9_0_kiq_init_register(ring);
3868 		soc15_grbm_select(adev, 0, 0, 0, 0);
3869 		mutex_unlock(&adev->srbm_mutex);
3870 	} else {
3871 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3872 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3873 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3874 		mutex_lock(&adev->srbm_mutex);
3875 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3876 		gfx_v9_0_mqd_init(ring);
3877 		gfx_v9_0_kiq_init_register(ring);
3878 		soc15_grbm_select(adev, 0, 0, 0, 0);
3879 		mutex_unlock(&adev->srbm_mutex);
3880 
3881 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3882 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3883 	}
3884 
3885 	return 0;
3886 }
3887 
3888 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3889 {
3890 	struct amdgpu_device *adev = ring->adev;
3891 	struct v9_mqd *mqd = ring->mqd_ptr;
3892 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3893 	struct v9_mqd *tmp_mqd;
3894 
3895 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3896 	 * is not be initialized before
3897 	 */
3898 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3899 
3900 	if (!tmp_mqd->cp_hqd_pq_control ||
3901 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3902 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3903 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3904 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3905 		mutex_lock(&adev->srbm_mutex);
3906 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3907 		gfx_v9_0_mqd_init(ring);
3908 		soc15_grbm_select(adev, 0, 0, 0, 0);
3909 		mutex_unlock(&adev->srbm_mutex);
3910 
3911 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3912 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3913 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3914 		/* reset MQD to a clean status */
3915 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3916 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3917 
3918 		/* reset ring buffer */
3919 		ring->wptr = 0;
3920 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3921 		amdgpu_ring_clear_ring(ring);
3922 	} else {
3923 		amdgpu_ring_clear_ring(ring);
3924 	}
3925 
3926 	return 0;
3927 }
3928 
3929 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3930 {
3931 	struct amdgpu_ring *ring;
3932 	int r;
3933 
3934 	ring = &adev->gfx.kiq.ring;
3935 
3936 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3937 	if (unlikely(r != 0))
3938 		return r;
3939 
3940 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3941 	if (unlikely(r != 0))
3942 		return r;
3943 
3944 	gfx_v9_0_kiq_init_queue(ring);
3945 	amdgpu_bo_kunmap(ring->mqd_obj);
3946 	ring->mqd_ptr = NULL;
3947 	amdgpu_bo_unreserve(ring->mqd_obj);
3948 	ring->sched.ready = true;
3949 	return 0;
3950 }
3951 
3952 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3953 {
3954 	struct amdgpu_ring *ring = NULL;
3955 	int r = 0, i;
3956 
3957 	gfx_v9_0_cp_compute_enable(adev, true);
3958 
3959 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3960 		ring = &adev->gfx.compute_ring[i];
3961 
3962 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3963 		if (unlikely(r != 0))
3964 			goto done;
3965 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3966 		if (!r) {
3967 			r = gfx_v9_0_kcq_init_queue(ring);
3968 			amdgpu_bo_kunmap(ring->mqd_obj);
3969 			ring->mqd_ptr = NULL;
3970 		}
3971 		amdgpu_bo_unreserve(ring->mqd_obj);
3972 		if (r)
3973 			goto done;
3974 	}
3975 
3976 	r = amdgpu_gfx_enable_kcq(adev);
3977 done:
3978 	return r;
3979 }
3980 
3981 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3982 {
3983 	int r, i;
3984 	struct amdgpu_ring *ring;
3985 
3986 	if (!(adev->flags & AMD_IS_APU))
3987 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3988 
3989 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3990 		if (adev->gfx.num_gfx_rings) {
3991 			/* legacy firmware loading */
3992 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3993 			if (r)
3994 				return r;
3995 		}
3996 
3997 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3998 		if (r)
3999 			return r;
4000 	}
4001 
4002 	r = gfx_v9_0_kiq_resume(adev);
4003 	if (r)
4004 		return r;
4005 
4006 	if (adev->gfx.num_gfx_rings) {
4007 		r = gfx_v9_0_cp_gfx_resume(adev);
4008 		if (r)
4009 			return r;
4010 	}
4011 
4012 	r = gfx_v9_0_kcq_resume(adev);
4013 	if (r)
4014 		return r;
4015 
4016 	if (adev->gfx.num_gfx_rings) {
4017 		ring = &adev->gfx.gfx_ring[0];
4018 		r = amdgpu_ring_test_helper(ring);
4019 		if (r)
4020 			return r;
4021 	}
4022 
4023 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4024 		ring = &adev->gfx.compute_ring[i];
4025 		amdgpu_ring_test_helper(ring);
4026 	}
4027 
4028 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
4029 
4030 	return 0;
4031 }
4032 
4033 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
4034 {
4035 	u32 tmp;
4036 
4037 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
4038 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
4039 		return;
4040 
4041 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4042 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4043 				adev->df.hash_status.hash_64k);
4044 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4045 				adev->df.hash_status.hash_2m);
4046 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4047 				adev->df.hash_status.hash_1g);
4048 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4049 }
4050 
4051 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4052 {
4053 	if (adev->gfx.num_gfx_rings)
4054 		gfx_v9_0_cp_gfx_enable(adev, enable);
4055 	gfx_v9_0_cp_compute_enable(adev, enable);
4056 }
4057 
4058 static int gfx_v9_0_hw_init(void *handle)
4059 {
4060 	int r;
4061 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4062 
4063 	if (!amdgpu_sriov_vf(adev))
4064 		gfx_v9_0_init_golden_registers(adev);
4065 
4066 	gfx_v9_0_constants_init(adev);
4067 
4068 	gfx_v9_0_init_tcp_config(adev);
4069 
4070 	r = adev->gfx.rlc.funcs->resume(adev);
4071 	if (r)
4072 		return r;
4073 
4074 	r = gfx_v9_0_cp_resume(adev);
4075 	if (r)
4076 		return r;
4077 
4078 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4079 		gfx_v9_4_2_set_power_brake_sequence(adev);
4080 
4081 	return r;
4082 }
4083 
4084 static int gfx_v9_0_hw_fini(void *handle)
4085 {
4086 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4087 
4088 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4089 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4090 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4091 
4092 	/* DF freeze and kcq disable will fail */
4093 	if (!amdgpu_ras_intr_triggered())
4094 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4095 		amdgpu_gfx_disable_kcq(adev);
4096 
4097 	if (amdgpu_sriov_vf(adev)) {
4098 		gfx_v9_0_cp_gfx_enable(adev, false);
4099 		/* must disable polling for SRIOV when hw finished, otherwise
4100 		 * CPC engine may still keep fetching WB address which is already
4101 		 * invalid after sw finished and trigger DMAR reading error in
4102 		 * hypervisor side.
4103 		 */
4104 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4105 		return 0;
4106 	}
4107 
4108 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4109 	 * otherwise KIQ is hanging when binding back
4110 	 */
4111 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4112 		mutex_lock(&adev->srbm_mutex);
4113 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4114 				adev->gfx.kiq.ring.pipe,
4115 				adev->gfx.kiq.ring.queue, 0);
4116 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4117 		soc15_grbm_select(adev, 0, 0, 0, 0);
4118 		mutex_unlock(&adev->srbm_mutex);
4119 	}
4120 
4121 	gfx_v9_0_cp_enable(adev, false);
4122 
4123 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4124 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4125 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4126 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4127 		return 0;
4128 	}
4129 
4130 	adev->gfx.rlc.funcs->stop(adev);
4131 	return 0;
4132 }
4133 
4134 static int gfx_v9_0_suspend(void *handle)
4135 {
4136 	return gfx_v9_0_hw_fini(handle);
4137 }
4138 
4139 static int gfx_v9_0_resume(void *handle)
4140 {
4141 	return gfx_v9_0_hw_init(handle);
4142 }
4143 
4144 static bool gfx_v9_0_is_idle(void *handle)
4145 {
4146 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4147 
4148 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4149 				GRBM_STATUS, GUI_ACTIVE))
4150 		return false;
4151 	else
4152 		return true;
4153 }
4154 
4155 static int gfx_v9_0_wait_for_idle(void *handle)
4156 {
4157 	unsigned i;
4158 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4159 
4160 	for (i = 0; i < adev->usec_timeout; i++) {
4161 		if (gfx_v9_0_is_idle(handle))
4162 			return 0;
4163 		udelay(1);
4164 	}
4165 	return -ETIMEDOUT;
4166 }
4167 
4168 static int gfx_v9_0_soft_reset(void *handle)
4169 {
4170 	u32 grbm_soft_reset = 0;
4171 	u32 tmp;
4172 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4173 
4174 	/* GRBM_STATUS */
4175 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4176 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4177 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4178 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4179 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4180 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4181 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4182 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4183 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4184 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4185 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4186 	}
4187 
4188 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4189 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4190 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4191 	}
4192 
4193 	/* GRBM_STATUS2 */
4194 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4195 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4196 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4197 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4198 
4199 
4200 	if (grbm_soft_reset) {
4201 		/* stop the rlc */
4202 		adev->gfx.rlc.funcs->stop(adev);
4203 
4204 		if (adev->gfx.num_gfx_rings)
4205 			/* Disable GFX parsing/prefetching */
4206 			gfx_v9_0_cp_gfx_enable(adev, false);
4207 
4208 		/* Disable MEC parsing/prefetching */
4209 		gfx_v9_0_cp_compute_enable(adev, false);
4210 
4211 		if (grbm_soft_reset) {
4212 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4213 			tmp |= grbm_soft_reset;
4214 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4215 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4216 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4217 
4218 			udelay(50);
4219 
4220 			tmp &= ~grbm_soft_reset;
4221 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4222 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4223 		}
4224 
4225 		/* Wait a little for things to settle down */
4226 		udelay(50);
4227 	}
4228 	return 0;
4229 }
4230 
4231 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4232 {
4233 	signed long r, cnt = 0;
4234 	unsigned long flags;
4235 	uint32_t seq, reg_val_offs = 0;
4236 	uint64_t value = 0;
4237 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4238 	struct amdgpu_ring *ring = &kiq->ring;
4239 
4240 	BUG_ON(!ring->funcs->emit_rreg);
4241 
4242 	spin_lock_irqsave(&kiq->ring_lock, flags);
4243 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4244 		pr_err("critical bug! too many kiq readers\n");
4245 		goto failed_unlock;
4246 	}
4247 	amdgpu_ring_alloc(ring, 32);
4248 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4249 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4250 				(5 << 8) |	/* dst: memory */
4251 				(1 << 16) |	/* count sel */
4252 				(1 << 20));	/* write confirm */
4253 	amdgpu_ring_write(ring, 0);
4254 	amdgpu_ring_write(ring, 0);
4255 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4256 				reg_val_offs * 4));
4257 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4258 				reg_val_offs * 4));
4259 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4260 	if (r)
4261 		goto failed_undo;
4262 
4263 	amdgpu_ring_commit(ring);
4264 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4265 
4266 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4267 
4268 	/* don't wait anymore for gpu reset case because this way may
4269 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4270 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4271 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4272 	 * gpu_recover() hang there.
4273 	 *
4274 	 * also don't wait anymore for IRQ context
4275 	 * */
4276 	if (r < 1 && (amdgpu_in_reset(adev)))
4277 		goto failed_kiq_read;
4278 
4279 	might_sleep();
4280 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4281 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4282 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4283 	}
4284 
4285 	if (cnt > MAX_KIQ_REG_TRY)
4286 		goto failed_kiq_read;
4287 
4288 	mb();
4289 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4290 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4291 	amdgpu_device_wb_free(adev, reg_val_offs);
4292 	return value;
4293 
4294 failed_undo:
4295 	amdgpu_ring_undo(ring);
4296 failed_unlock:
4297 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4298 failed_kiq_read:
4299 	if (reg_val_offs)
4300 		amdgpu_device_wb_free(adev, reg_val_offs);
4301 	pr_err("failed to read gpu clock\n");
4302 	return ~0;
4303 }
4304 
4305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4306 {
4307 	uint64_t clock, clock_lo, clock_hi, hi_check;
4308 
4309 	switch (adev->ip_versions[GC_HWIP][0]) {
4310 	case IP_VERSION(9, 3, 0):
4311 		preempt_disable();
4312 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4313 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4314 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4315 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4316 		 * roughly every 42 seconds.
4317 		 */
4318 		if (hi_check != clock_hi) {
4319 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4320 			clock_hi = hi_check;
4321 		}
4322 		preempt_enable();
4323 		clock = clock_lo | (clock_hi << 32ULL);
4324 		break;
4325 	default:
4326 		amdgpu_gfx_off_ctrl(adev, false);
4327 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4328 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4329 			clock = gfx_v9_0_kiq_read_clock(adev);
4330 		} else {
4331 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4332 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4333 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4334 		}
4335 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4336 		amdgpu_gfx_off_ctrl(adev, true);
4337 		break;
4338 	}
4339 	return clock;
4340 }
4341 
4342 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4343 					  uint32_t vmid,
4344 					  uint32_t gds_base, uint32_t gds_size,
4345 					  uint32_t gws_base, uint32_t gws_size,
4346 					  uint32_t oa_base, uint32_t oa_size)
4347 {
4348 	struct amdgpu_device *adev = ring->adev;
4349 
4350 	/* GDS Base */
4351 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4352 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4353 				   gds_base);
4354 
4355 	/* GDS Size */
4356 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4357 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4358 				   gds_size);
4359 
4360 	/* GWS */
4361 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4362 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4363 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4364 
4365 	/* OA */
4366 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4367 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4368 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4369 }
4370 
4371 static const u32 vgpr_init_compute_shader[] =
4372 {
4373 	0xb07c0000, 0xbe8000ff,
4374 	0x000000f8, 0xbf110800,
4375 	0x7e000280, 0x7e020280,
4376 	0x7e040280, 0x7e060280,
4377 	0x7e080280, 0x7e0a0280,
4378 	0x7e0c0280, 0x7e0e0280,
4379 	0x80808800, 0xbe803200,
4380 	0xbf84fff5, 0xbf9c0000,
4381 	0xd28c0001, 0x0001007f,
4382 	0xd28d0001, 0x0002027e,
4383 	0x10020288, 0xb8810904,
4384 	0xb7814000, 0xd1196a01,
4385 	0x00000301, 0xbe800087,
4386 	0xbefc00c1, 0xd89c4000,
4387 	0x00020201, 0xd89cc080,
4388 	0x00040401, 0x320202ff,
4389 	0x00000800, 0x80808100,
4390 	0xbf84fff8, 0x7e020280,
4391 	0xbf810000, 0x00000000,
4392 };
4393 
4394 static const u32 sgpr_init_compute_shader[] =
4395 {
4396 	0xb07c0000, 0xbe8000ff,
4397 	0x0000005f, 0xbee50080,
4398 	0xbe812c65, 0xbe822c65,
4399 	0xbe832c65, 0xbe842c65,
4400 	0xbe852c65, 0xb77c0005,
4401 	0x80808500, 0xbf84fff8,
4402 	0xbe800080, 0xbf810000,
4403 };
4404 
4405 static const u32 vgpr_init_compute_shader_arcturus[] = {
4406 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4407 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4408 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4409 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4410 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4411 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4412 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4413 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4414 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4415 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4416 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4417 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4418 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4419 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4420 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4421 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4422 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4423 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4424 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4425 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4426 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4427 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4428 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4429 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4430 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4431 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4432 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4433 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4434 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4435 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4436 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4437 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4438 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4439 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4440 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4441 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4442 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4443 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4444 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4445 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4446 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4447 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4448 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4449 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4450 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4451 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4452 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4453 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4454 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4455 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4456 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4457 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4458 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4459 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4460 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4461 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4462 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4463 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4464 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4465 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4466 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4467 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4468 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4469 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4470 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4471 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4472 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4473 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4474 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4475 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4476 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4477 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4478 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4479 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4480 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4481 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4482 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4483 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4484 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4485 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4486 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4487 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4488 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4489 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4490 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4491 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4492 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4493 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4494 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4495 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4496 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4497 	0xbf84fff8, 0xbf810000,
4498 };
4499 
4500 /* When below register arrays changed, please update gpr_reg_size,
4501   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4502   to cover all gfx9 ASICs */
4503 static const struct soc15_reg_entry vgpr_init_regs[] = {
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4509    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4510    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4511    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4515    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4516    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4517    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4518 };
4519 
4520 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4523    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4524    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4525    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4526    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4527    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4528    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4529    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4530    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4531    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4532    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4533    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4534    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4535 };
4536 
4537 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4538    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4539    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4540    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4541    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4542    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4543    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4544    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4545    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4546    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4547    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4548    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4549    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4550    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4551    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4552 };
4553 
4554 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4555    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4556    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4557    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4558    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4559    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4560    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4561    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4562    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4563    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4564    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4565    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4566    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4567    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4568    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4569 };
4570 
4571 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4572    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4573    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4574    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4575    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4576    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4577    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4578    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4579    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4580    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4581    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4582    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4583    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4584    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4585    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4586    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4587    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4588    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4589    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4590    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4591    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4592    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4593    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4594    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4595    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4596    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4597    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4598    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4599    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4600    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4601    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4602    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4603    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4604    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4605 };
4606 
4607 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4608 {
4609 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4610 	int i, r;
4611 
4612 	/* only support when RAS is enabled */
4613 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4614 		return 0;
4615 
4616 	r = amdgpu_ring_alloc(ring, 7);
4617 	if (r) {
4618 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4619 			ring->name, r);
4620 		return r;
4621 	}
4622 
4623 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4624 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4625 
4626 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4627 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4628 				PACKET3_DMA_DATA_DST_SEL(1) |
4629 				PACKET3_DMA_DATA_SRC_SEL(2) |
4630 				PACKET3_DMA_DATA_ENGINE(0)));
4631 	amdgpu_ring_write(ring, 0);
4632 	amdgpu_ring_write(ring, 0);
4633 	amdgpu_ring_write(ring, 0);
4634 	amdgpu_ring_write(ring, 0);
4635 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4636 				adev->gds.gds_size);
4637 
4638 	amdgpu_ring_commit(ring);
4639 
4640 	for (i = 0; i < adev->usec_timeout; i++) {
4641 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4642 			break;
4643 		udelay(1);
4644 	}
4645 
4646 	if (i >= adev->usec_timeout)
4647 		r = -ETIMEDOUT;
4648 
4649 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4650 
4651 	return r;
4652 }
4653 
4654 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4655 {
4656 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4657 	struct amdgpu_ib ib;
4658 	struct dma_fence *f = NULL;
4659 	int r, i;
4660 	unsigned total_size, vgpr_offset, sgpr_offset;
4661 	u64 gpu_addr;
4662 
4663 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4664 						adev->gfx.config.max_cu_per_sh *
4665 						adev->gfx.config.max_sh_per_se;
4666 	int sgpr_work_group_size = 5;
4667 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4668 	int vgpr_init_shader_size;
4669 	const u32 *vgpr_init_shader_ptr;
4670 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4671 
4672 	/* only support when RAS is enabled */
4673 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4674 		return 0;
4675 
4676 	/* bail if the compute ring is not ready */
4677 	if (!ring->sched.ready)
4678 		return 0;
4679 
4680 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4681 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4682 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4683 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4684 	} else {
4685 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4686 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4687 		vgpr_init_regs_ptr = vgpr_init_regs;
4688 	}
4689 
4690 	total_size =
4691 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4692 	total_size +=
4693 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4694 	total_size +=
4695 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4696 	total_size = ALIGN(total_size, 256);
4697 	vgpr_offset = total_size;
4698 	total_size += ALIGN(vgpr_init_shader_size, 256);
4699 	sgpr_offset = total_size;
4700 	total_size += sizeof(sgpr_init_compute_shader);
4701 
4702 	/* allocate an indirect buffer to put the commands in */
4703 	memset(&ib, 0, sizeof(ib));
4704 	r = amdgpu_ib_get(adev, NULL, total_size,
4705 					AMDGPU_IB_POOL_DIRECT, &ib);
4706 	if (r) {
4707 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4708 		return r;
4709 	}
4710 
4711 	/* load the compute shaders */
4712 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4713 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4714 
4715 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4716 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4717 
4718 	/* init the ib length to 0 */
4719 	ib.length_dw = 0;
4720 
4721 	/* VGPR */
4722 	/* write the register state for the compute dispatch */
4723 	for (i = 0; i < gpr_reg_size; i++) {
4724 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4725 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4726 								- PACKET3_SET_SH_REG_START;
4727 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4728 	}
4729 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4730 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4731 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4732 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4733 							- PACKET3_SET_SH_REG_START;
4734 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4735 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4736 
4737 	/* write dispatch packet */
4738 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4739 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4740 	ib.ptr[ib.length_dw++] = 1; /* y */
4741 	ib.ptr[ib.length_dw++] = 1; /* z */
4742 	ib.ptr[ib.length_dw++] =
4743 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4744 
4745 	/* write CS partial flush packet */
4746 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4747 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4748 
4749 	/* SGPR1 */
4750 	/* write the register state for the compute dispatch */
4751 	for (i = 0; i < gpr_reg_size; i++) {
4752 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4753 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4754 								- PACKET3_SET_SH_REG_START;
4755 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4756 	}
4757 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4758 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4759 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4760 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4761 							- PACKET3_SET_SH_REG_START;
4762 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4763 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4764 
4765 	/* write dispatch packet */
4766 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4767 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4768 	ib.ptr[ib.length_dw++] = 1; /* y */
4769 	ib.ptr[ib.length_dw++] = 1; /* z */
4770 	ib.ptr[ib.length_dw++] =
4771 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4772 
4773 	/* write CS partial flush packet */
4774 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4775 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4776 
4777 	/* SGPR2 */
4778 	/* write the register state for the compute dispatch */
4779 	for (i = 0; i < gpr_reg_size; i++) {
4780 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4781 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4782 								- PACKET3_SET_SH_REG_START;
4783 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4784 	}
4785 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4786 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4787 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4788 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4789 							- PACKET3_SET_SH_REG_START;
4790 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4791 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4792 
4793 	/* write dispatch packet */
4794 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4795 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4796 	ib.ptr[ib.length_dw++] = 1; /* y */
4797 	ib.ptr[ib.length_dw++] = 1; /* z */
4798 	ib.ptr[ib.length_dw++] =
4799 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4800 
4801 	/* write CS partial flush packet */
4802 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4803 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4804 
4805 	/* shedule the ib on the ring */
4806 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4807 	if (r) {
4808 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4809 		goto fail;
4810 	}
4811 
4812 	/* wait for the GPU to finish processing the IB */
4813 	r = dma_fence_wait(f, false);
4814 	if (r) {
4815 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4816 		goto fail;
4817 	}
4818 
4819 fail:
4820 	amdgpu_ib_free(adev, &ib, NULL);
4821 	dma_fence_put(f);
4822 
4823 	return r;
4824 }
4825 
4826 static int gfx_v9_0_early_init(void *handle)
4827 {
4828 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4829 
4830 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4831 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4832 		adev->gfx.num_gfx_rings = 0;
4833 	else
4834 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4835 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4836 					  AMDGPU_MAX_COMPUTE_RINGS);
4837 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4838 	gfx_v9_0_set_ring_funcs(adev);
4839 	gfx_v9_0_set_irq_funcs(adev);
4840 	gfx_v9_0_set_gds_init(adev);
4841 	gfx_v9_0_set_rlc_funcs(adev);
4842 
4843 	return 0;
4844 }
4845 
4846 static int gfx_v9_0_ecc_late_init(void *handle)
4847 {
4848 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4849 	int r;
4850 
4851 	/*
4852 	 * Temp workaround to fix the issue that CP firmware fails to
4853 	 * update read pointer when CPDMA is writing clearing operation
4854 	 * to GDS in suspend/resume sequence on several cards. So just
4855 	 * limit this operation in cold boot sequence.
4856 	 */
4857 	if ((!adev->in_suspend) &&
4858 	    (adev->gds.gds_size)) {
4859 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4860 		if (r)
4861 			return r;
4862 	}
4863 
4864 	/* requires IBs so do in late init after IB pool is initialized */
4865 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4866 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4867 	else
4868 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4869 
4870 	if (r)
4871 		return r;
4872 
4873 	if (adev->gfx.ras_funcs &&
4874 	    adev->gfx.ras_funcs->ras_late_init) {
4875 		r = adev->gfx.ras_funcs->ras_late_init(adev);
4876 		if (r)
4877 			return r;
4878 	}
4879 
4880 	if (adev->gfx.ras_funcs &&
4881 	    adev->gfx.ras_funcs->enable_watchdog_timer)
4882 		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4883 
4884 	return 0;
4885 }
4886 
4887 static int gfx_v9_0_late_init(void *handle)
4888 {
4889 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4890 	int r;
4891 
4892 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4893 	if (r)
4894 		return r;
4895 
4896 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4897 	if (r)
4898 		return r;
4899 
4900 	r = gfx_v9_0_ecc_late_init(handle);
4901 	if (r)
4902 		return r;
4903 
4904 	return 0;
4905 }
4906 
4907 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4908 {
4909 	uint32_t rlc_setting;
4910 
4911 	/* if RLC is not enabled, do nothing */
4912 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4913 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4914 		return false;
4915 
4916 	return true;
4917 }
4918 
4919 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4920 {
4921 	uint32_t data;
4922 	unsigned i;
4923 
4924 	data = RLC_SAFE_MODE__CMD_MASK;
4925 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4926 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4927 
4928 	/* wait for RLC_SAFE_MODE */
4929 	for (i = 0; i < adev->usec_timeout; i++) {
4930 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4931 			break;
4932 		udelay(1);
4933 	}
4934 }
4935 
4936 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4937 {
4938 	uint32_t data;
4939 
4940 	data = RLC_SAFE_MODE__CMD_MASK;
4941 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4942 }
4943 
4944 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4945 						bool enable)
4946 {
4947 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4948 
4949 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4950 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4951 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4952 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4953 	} else {
4954 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4955 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4956 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4957 	}
4958 
4959 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4960 }
4961 
4962 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4963 						bool enable)
4964 {
4965 	/* TODO: double check if we need to perform under safe mode */
4966 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4967 
4968 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4969 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4970 	else
4971 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4972 
4973 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4974 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4975 	else
4976 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4977 
4978 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4979 }
4980 
4981 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4982 						      bool enable)
4983 {
4984 	uint32_t data, def;
4985 
4986 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4987 
4988 	/* It is disabled by HW by default */
4989 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4990 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4991 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4992 
4993 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4994 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4995 
4996 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4997 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4998 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4999 
5000 		/* only for Vega10 & Raven1 */
5001 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
5002 
5003 		if (def != data)
5004 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5005 
5006 		/* MGLS is a global flag to control all MGLS in GFX */
5007 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5008 			/* 2 - RLC memory Light sleep */
5009 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5010 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5011 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5012 				if (def != data)
5013 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5014 			}
5015 			/* 3 - CP memory Light sleep */
5016 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5017 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5018 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5019 				if (def != data)
5020 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5021 			}
5022 		}
5023 	} else {
5024 		/* 1 - MGCG_OVERRIDE */
5025 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5026 
5027 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
5028 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5029 
5030 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5031 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5032 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5033 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5034 
5035 		if (def != data)
5036 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5037 
5038 		/* 2 - disable MGLS in RLC */
5039 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5040 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5041 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5042 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5043 		}
5044 
5045 		/* 3 - disable MGLS in CP */
5046 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5047 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5048 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5049 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5050 		}
5051 	}
5052 
5053 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5054 }
5055 
5056 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5057 					   bool enable)
5058 {
5059 	uint32_t data, def;
5060 
5061 	if (!adev->gfx.num_gfx_rings)
5062 		return;
5063 
5064 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5065 
5066 	/* Enable 3D CGCG/CGLS */
5067 	if (enable) {
5068 		/* write cmd to clear cgcg/cgls ov */
5069 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5070 		/* unset CGCG override */
5071 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5072 		/* update CGCG and CGLS override bits */
5073 		if (def != data)
5074 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5075 
5076 		/* enable 3Dcgcg FSM(0x0000363f) */
5077 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5078 
5079 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5080 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5081 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5082 		else
5083 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5084 
5085 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5086 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5087 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5088 		if (def != data)
5089 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5090 
5091 		/* set IDLE_POLL_COUNT(0x00900100) */
5092 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5093 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5094 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5095 		if (def != data)
5096 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5097 	} else {
5098 		/* Disable CGCG/CGLS */
5099 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5100 		/* disable cgcg, cgls should be disabled */
5101 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5102 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5103 		/* disable cgcg and cgls in FSM */
5104 		if (def != data)
5105 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5106 	}
5107 
5108 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5109 }
5110 
5111 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5112 						      bool enable)
5113 {
5114 	uint32_t def, data;
5115 
5116 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5117 
5118 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5119 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5120 		/* unset CGCG override */
5121 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5122 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5123 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5124 		else
5125 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5126 		/* update CGCG and CGLS override bits */
5127 		if (def != data)
5128 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5129 
5130 		/* enable cgcg FSM(0x0000363F) */
5131 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5132 
5133 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5134 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5135 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5136 		else
5137 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5138 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5139 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5140 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5141 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5142 		if (def != data)
5143 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5144 
5145 		/* set IDLE_POLL_COUNT(0x00900100) */
5146 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5147 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5148 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5149 		if (def != data)
5150 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5151 	} else {
5152 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5153 		/* reset CGCG/CGLS bits */
5154 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5155 		/* disable cgcg and cgls in FSM */
5156 		if (def != data)
5157 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5158 	}
5159 
5160 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5161 }
5162 
5163 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5164 					    bool enable)
5165 {
5166 	if (enable) {
5167 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5168 		 * ===  MGCG + MGLS ===
5169 		 */
5170 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5171 		/* ===  CGCG /CGLS for GFX 3D Only === */
5172 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5173 		/* ===  CGCG + CGLS === */
5174 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5175 	} else {
5176 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5177 		 * ===  CGCG + CGLS ===
5178 		 */
5179 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5180 		/* ===  CGCG /CGLS for GFX 3D Only === */
5181 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5182 		/* ===  MGCG + MGLS === */
5183 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5184 	}
5185 	return 0;
5186 }
5187 
5188 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5189 {
5190 	u32 reg, data;
5191 
5192 	amdgpu_gfx_off_ctrl(adev, false);
5193 
5194 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5195 	if (amdgpu_sriov_is_pp_one_vf(adev))
5196 		data = RREG32_NO_KIQ(reg);
5197 	else
5198 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5199 
5200 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5201 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5202 
5203 	if (amdgpu_sriov_is_pp_one_vf(adev))
5204 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5205 	else
5206 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5207 
5208 	amdgpu_gfx_off_ctrl(adev, true);
5209 }
5210 
5211 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5212 					uint32_t offset,
5213 					struct soc15_reg_rlcg *entries, int arr_size)
5214 {
5215 	int i;
5216 	uint32_t reg;
5217 
5218 	if (!entries)
5219 		return false;
5220 
5221 	for (i = 0; i < arr_size; i++) {
5222 		const struct soc15_reg_rlcg *entry;
5223 
5224 		entry = &entries[i];
5225 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5226 		if (offset == reg)
5227 			return true;
5228 	}
5229 
5230 	return false;
5231 }
5232 
5233 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5234 {
5235 	return gfx_v9_0_check_rlcg_range(adev, offset,
5236 					(void *)rlcg_access_gc_9_0,
5237 					ARRAY_SIZE(rlcg_access_gc_9_0));
5238 }
5239 
5240 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5241 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5242 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5243 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5244 	.init = gfx_v9_0_rlc_init,
5245 	.get_csb_size = gfx_v9_0_get_csb_size,
5246 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5247 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5248 	.resume = gfx_v9_0_rlc_resume,
5249 	.stop = gfx_v9_0_rlc_stop,
5250 	.reset = gfx_v9_0_rlc_reset,
5251 	.start = gfx_v9_0_rlc_start,
5252 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5253 	.sriov_wreg = gfx_v9_0_sriov_wreg,
5254 	.sriov_rreg = gfx_v9_0_sriov_rreg,
5255 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5256 };
5257 
5258 static int gfx_v9_0_set_powergating_state(void *handle,
5259 					  enum amd_powergating_state state)
5260 {
5261 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262 	bool enable = (state == AMD_PG_STATE_GATE);
5263 
5264 	switch (adev->ip_versions[GC_HWIP][0]) {
5265 	case IP_VERSION(9, 2, 2):
5266 	case IP_VERSION(9, 1, 0):
5267 	case IP_VERSION(9, 3, 0):
5268 		if (!enable)
5269 			amdgpu_gfx_off_ctrl(adev, false);
5270 
5271 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5272 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5273 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5274 		} else {
5275 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5276 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5277 		}
5278 
5279 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5280 			gfx_v9_0_enable_cp_power_gating(adev, true);
5281 		else
5282 			gfx_v9_0_enable_cp_power_gating(adev, false);
5283 
5284 		/* update gfx cgpg state */
5285 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5286 
5287 		/* update mgcg state */
5288 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5289 
5290 		if (enable)
5291 			amdgpu_gfx_off_ctrl(adev, true);
5292 		break;
5293 	case IP_VERSION(9, 2, 1):
5294 		amdgpu_gfx_off_ctrl(adev, enable);
5295 		break;
5296 	default:
5297 		break;
5298 	}
5299 
5300 	return 0;
5301 }
5302 
5303 static int gfx_v9_0_set_clockgating_state(void *handle,
5304 					  enum amd_clockgating_state state)
5305 {
5306 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5307 
5308 	if (amdgpu_sriov_vf(adev))
5309 		return 0;
5310 
5311 	switch (adev->ip_versions[GC_HWIP][0]) {
5312 	case IP_VERSION(9, 0, 1):
5313 	case IP_VERSION(9, 2, 1):
5314 	case IP_VERSION(9, 4, 0):
5315 	case IP_VERSION(9, 2, 2):
5316 	case IP_VERSION(9, 1, 0):
5317 	case IP_VERSION(9, 4, 1):
5318 	case IP_VERSION(9, 3, 0):
5319 	case IP_VERSION(9, 4, 2):
5320 		gfx_v9_0_update_gfx_clock_gating(adev,
5321 						 state == AMD_CG_STATE_GATE);
5322 		break;
5323 	default:
5324 		break;
5325 	}
5326 	return 0;
5327 }
5328 
5329 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5330 {
5331 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5332 	int data;
5333 
5334 	if (amdgpu_sriov_vf(adev))
5335 		*flags = 0;
5336 
5337 	/* AMD_CG_SUPPORT_GFX_MGCG */
5338 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5339 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5340 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5341 
5342 	/* AMD_CG_SUPPORT_GFX_CGCG */
5343 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5344 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5345 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5346 
5347 	/* AMD_CG_SUPPORT_GFX_CGLS */
5348 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5349 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5350 
5351 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5352 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5353 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5354 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5355 
5356 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5357 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5358 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5359 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5360 
5361 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5362 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5363 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5364 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5365 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5366 
5367 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5368 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5369 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5370 	}
5371 }
5372 
5373 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5374 {
5375 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5376 }
5377 
5378 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5379 {
5380 	struct amdgpu_device *adev = ring->adev;
5381 	u64 wptr;
5382 
5383 	/* XXX check if swapping is necessary on BE */
5384 	if (ring->use_doorbell) {
5385 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5386 	} else {
5387 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5388 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5389 	}
5390 
5391 	return wptr;
5392 }
5393 
5394 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5395 {
5396 	struct amdgpu_device *adev = ring->adev;
5397 
5398 	if (ring->use_doorbell) {
5399 		/* XXX check if swapping is necessary on BE */
5400 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5401 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5402 	} else {
5403 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5404 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5405 	}
5406 }
5407 
5408 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5409 {
5410 	struct amdgpu_device *adev = ring->adev;
5411 	u32 ref_and_mask, reg_mem_engine;
5412 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5413 
5414 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5415 		switch (ring->me) {
5416 		case 1:
5417 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5418 			break;
5419 		case 2:
5420 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5421 			break;
5422 		default:
5423 			return;
5424 		}
5425 		reg_mem_engine = 0;
5426 	} else {
5427 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5428 		reg_mem_engine = 1; /* pfp */
5429 	}
5430 
5431 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5432 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5433 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5434 			      ref_and_mask, ref_and_mask, 0x20);
5435 }
5436 
5437 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5438 					struct amdgpu_job *job,
5439 					struct amdgpu_ib *ib,
5440 					uint32_t flags)
5441 {
5442 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5443 	u32 header, control = 0;
5444 
5445 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5446 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5447 	else
5448 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5449 
5450 	control |= ib->length_dw | (vmid << 24);
5451 
5452 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5453 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5454 
5455 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5456 			gfx_v9_0_ring_emit_de_meta(ring);
5457 	}
5458 
5459 	amdgpu_ring_write(ring, header);
5460 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5461 	amdgpu_ring_write(ring,
5462 #ifdef __BIG_ENDIAN
5463 		(2 << 0) |
5464 #endif
5465 		lower_32_bits(ib->gpu_addr));
5466 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5467 	amdgpu_ring_write(ring, control);
5468 }
5469 
5470 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5471 					  struct amdgpu_job *job,
5472 					  struct amdgpu_ib *ib,
5473 					  uint32_t flags)
5474 {
5475 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5476 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5477 
5478 	/* Currently, there is a high possibility to get wave ID mismatch
5479 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5480 	 * different wave IDs than the GDS expects. This situation happens
5481 	 * randomly when at least 5 compute pipes use GDS ordered append.
5482 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5483 	 * Those are probably bugs somewhere else in the kernel driver.
5484 	 *
5485 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5486 	 * GDS to 0 for this ring (me/pipe).
5487 	 */
5488 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5489 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5490 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5491 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5492 	}
5493 
5494 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5495 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5496 	amdgpu_ring_write(ring,
5497 #ifdef __BIG_ENDIAN
5498 				(2 << 0) |
5499 #endif
5500 				lower_32_bits(ib->gpu_addr));
5501 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5502 	amdgpu_ring_write(ring, control);
5503 }
5504 
5505 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5506 				     u64 seq, unsigned flags)
5507 {
5508 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5509 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5510 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5511 
5512 	/* RELEASE_MEM - flush caches, send int */
5513 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5514 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5515 					       EOP_TC_NC_ACTION_EN) :
5516 					      (EOP_TCL1_ACTION_EN |
5517 					       EOP_TC_ACTION_EN |
5518 					       EOP_TC_WB_ACTION_EN |
5519 					       EOP_TC_MD_ACTION_EN)) |
5520 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5521 				 EVENT_INDEX(5)));
5522 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5523 
5524 	/*
5525 	 * the address should be Qword aligned if 64bit write, Dword
5526 	 * aligned if only send 32bit data low (discard data high)
5527 	 */
5528 	if (write64bit)
5529 		BUG_ON(addr & 0x7);
5530 	else
5531 		BUG_ON(addr & 0x3);
5532 	amdgpu_ring_write(ring, lower_32_bits(addr));
5533 	amdgpu_ring_write(ring, upper_32_bits(addr));
5534 	amdgpu_ring_write(ring, lower_32_bits(seq));
5535 	amdgpu_ring_write(ring, upper_32_bits(seq));
5536 	amdgpu_ring_write(ring, 0);
5537 }
5538 
5539 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5540 {
5541 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5542 	uint32_t seq = ring->fence_drv.sync_seq;
5543 	uint64_t addr = ring->fence_drv.gpu_addr;
5544 
5545 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5546 			      lower_32_bits(addr), upper_32_bits(addr),
5547 			      seq, 0xffffffff, 4);
5548 }
5549 
5550 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5551 					unsigned vmid, uint64_t pd_addr)
5552 {
5553 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5554 
5555 	/* compute doesn't have PFP */
5556 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5557 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5558 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5559 		amdgpu_ring_write(ring, 0x0);
5560 	}
5561 }
5562 
5563 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5564 {
5565 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5566 }
5567 
5568 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5569 {
5570 	u64 wptr;
5571 
5572 	/* XXX check if swapping is necessary on BE */
5573 	if (ring->use_doorbell)
5574 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5575 	else
5576 		BUG();
5577 	return wptr;
5578 }
5579 
5580 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5581 {
5582 	struct amdgpu_device *adev = ring->adev;
5583 
5584 	/* XXX check if swapping is necessary on BE */
5585 	if (ring->use_doorbell) {
5586 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5587 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5588 	} else{
5589 		BUG(); /* only DOORBELL method supported on gfx9 now */
5590 	}
5591 }
5592 
5593 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5594 					 u64 seq, unsigned int flags)
5595 {
5596 	struct amdgpu_device *adev = ring->adev;
5597 
5598 	/* we only allocate 32bit for each seq wb address */
5599 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5600 
5601 	/* write fence seq to the "addr" */
5602 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5603 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5604 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5605 	amdgpu_ring_write(ring, lower_32_bits(addr));
5606 	amdgpu_ring_write(ring, upper_32_bits(addr));
5607 	amdgpu_ring_write(ring, lower_32_bits(seq));
5608 
5609 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5610 		/* set register to trigger INT */
5611 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5612 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5613 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5614 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5615 		amdgpu_ring_write(ring, 0);
5616 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5617 	}
5618 }
5619 
5620 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5621 {
5622 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5623 	amdgpu_ring_write(ring, 0);
5624 }
5625 
5626 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5627 {
5628 	struct v9_ce_ib_state ce_payload = {0};
5629 	uint64_t csa_addr;
5630 	int cnt;
5631 
5632 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5633 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5634 
5635 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5636 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5637 				 WRITE_DATA_DST_SEL(8) |
5638 				 WR_CONFIRM) |
5639 				 WRITE_DATA_CACHE_POLICY(0));
5640 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5641 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5642 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5643 }
5644 
5645 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5646 {
5647 	struct v9_de_ib_state de_payload = {0};
5648 	uint64_t csa_addr, gds_addr;
5649 	int cnt;
5650 
5651 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5652 	gds_addr = csa_addr + 4096;
5653 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5654 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5655 
5656 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5657 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5658 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5659 				 WRITE_DATA_DST_SEL(8) |
5660 				 WR_CONFIRM) |
5661 				 WRITE_DATA_CACHE_POLICY(0));
5662 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5663 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5664 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5665 }
5666 
5667 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5668 				   bool secure)
5669 {
5670 	uint32_t v = secure ? FRAME_TMZ : 0;
5671 
5672 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5673 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5674 }
5675 
5676 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5677 {
5678 	uint32_t dw2 = 0;
5679 
5680 	if (amdgpu_sriov_vf(ring->adev))
5681 		gfx_v9_0_ring_emit_ce_meta(ring);
5682 
5683 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5684 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5685 		/* set load_global_config & load_global_uconfig */
5686 		dw2 |= 0x8001;
5687 		/* set load_cs_sh_regs */
5688 		dw2 |= 0x01000000;
5689 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5690 		dw2 |= 0x10002;
5691 
5692 		/* set load_ce_ram if preamble presented */
5693 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5694 			dw2 |= 0x10000000;
5695 	} else {
5696 		/* still load_ce_ram if this is the first time preamble presented
5697 		 * although there is no context switch happens.
5698 		 */
5699 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5700 			dw2 |= 0x10000000;
5701 	}
5702 
5703 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5704 	amdgpu_ring_write(ring, dw2);
5705 	amdgpu_ring_write(ring, 0);
5706 }
5707 
5708 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5709 {
5710 	unsigned ret;
5711 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5712 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5713 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5714 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5715 	ret = ring->wptr & ring->buf_mask;
5716 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5717 	return ret;
5718 }
5719 
5720 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5721 {
5722 	unsigned cur;
5723 	BUG_ON(offset > ring->buf_mask);
5724 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5725 
5726 	cur = (ring->wptr & ring->buf_mask) - 1;
5727 	if (likely(cur > offset))
5728 		ring->ring[offset] = cur - offset;
5729 	else
5730 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5731 }
5732 
5733 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5734 				    uint32_t reg_val_offs)
5735 {
5736 	struct amdgpu_device *adev = ring->adev;
5737 
5738 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5739 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5740 				(5 << 8) |	/* dst: memory */
5741 				(1 << 20));	/* write confirm */
5742 	amdgpu_ring_write(ring, reg);
5743 	amdgpu_ring_write(ring, 0);
5744 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5745 				reg_val_offs * 4));
5746 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5747 				reg_val_offs * 4));
5748 }
5749 
5750 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5751 				    uint32_t val)
5752 {
5753 	uint32_t cmd = 0;
5754 
5755 	switch (ring->funcs->type) {
5756 	case AMDGPU_RING_TYPE_GFX:
5757 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5758 		break;
5759 	case AMDGPU_RING_TYPE_KIQ:
5760 		cmd = (1 << 16); /* no inc addr */
5761 		break;
5762 	default:
5763 		cmd = WR_CONFIRM;
5764 		break;
5765 	}
5766 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5767 	amdgpu_ring_write(ring, cmd);
5768 	amdgpu_ring_write(ring, reg);
5769 	amdgpu_ring_write(ring, 0);
5770 	amdgpu_ring_write(ring, val);
5771 }
5772 
5773 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5774 					uint32_t val, uint32_t mask)
5775 {
5776 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5777 }
5778 
5779 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5780 						  uint32_t reg0, uint32_t reg1,
5781 						  uint32_t ref, uint32_t mask)
5782 {
5783 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5784 	struct amdgpu_device *adev = ring->adev;
5785 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5786 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5787 
5788 	if (fw_version_ok)
5789 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5790 				      ref, mask, 0x20);
5791 	else
5792 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5793 							   ref, mask);
5794 }
5795 
5796 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5797 {
5798 	struct amdgpu_device *adev = ring->adev;
5799 	uint32_t value = 0;
5800 
5801 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5802 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5803 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5804 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5805 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5806 }
5807 
5808 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5809 						 enum amdgpu_interrupt_state state)
5810 {
5811 	switch (state) {
5812 	case AMDGPU_IRQ_STATE_DISABLE:
5813 	case AMDGPU_IRQ_STATE_ENABLE:
5814 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5815 			       TIME_STAMP_INT_ENABLE,
5816 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5817 		break;
5818 	default:
5819 		break;
5820 	}
5821 }
5822 
5823 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5824 						     int me, int pipe,
5825 						     enum amdgpu_interrupt_state state)
5826 {
5827 	u32 mec_int_cntl, mec_int_cntl_reg;
5828 
5829 	/*
5830 	 * amdgpu controls only the first MEC. That's why this function only
5831 	 * handles the setting of interrupts for this specific MEC. All other
5832 	 * pipes' interrupts are set by amdkfd.
5833 	 */
5834 
5835 	if (me == 1) {
5836 		switch (pipe) {
5837 		case 0:
5838 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5839 			break;
5840 		case 1:
5841 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5842 			break;
5843 		case 2:
5844 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5845 			break;
5846 		case 3:
5847 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5848 			break;
5849 		default:
5850 			DRM_DEBUG("invalid pipe %d\n", pipe);
5851 			return;
5852 		}
5853 	} else {
5854 		DRM_DEBUG("invalid me %d\n", me);
5855 		return;
5856 	}
5857 
5858 	switch (state) {
5859 	case AMDGPU_IRQ_STATE_DISABLE:
5860 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5861 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5862 					     TIME_STAMP_INT_ENABLE, 0);
5863 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5864 		break;
5865 	case AMDGPU_IRQ_STATE_ENABLE:
5866 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5867 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5868 					     TIME_STAMP_INT_ENABLE, 1);
5869 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5870 		break;
5871 	default:
5872 		break;
5873 	}
5874 }
5875 
5876 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5877 					     struct amdgpu_irq_src *source,
5878 					     unsigned type,
5879 					     enum amdgpu_interrupt_state state)
5880 {
5881 	switch (state) {
5882 	case AMDGPU_IRQ_STATE_DISABLE:
5883 	case AMDGPU_IRQ_STATE_ENABLE:
5884 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5885 			       PRIV_REG_INT_ENABLE,
5886 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5887 		break;
5888 	default:
5889 		break;
5890 	}
5891 
5892 	return 0;
5893 }
5894 
5895 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5896 					      struct amdgpu_irq_src *source,
5897 					      unsigned type,
5898 					      enum amdgpu_interrupt_state state)
5899 {
5900 	switch (state) {
5901 	case AMDGPU_IRQ_STATE_DISABLE:
5902 	case AMDGPU_IRQ_STATE_ENABLE:
5903 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5904 			       PRIV_INSTR_INT_ENABLE,
5905 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5906 		break;
5907 	default:
5908 		break;
5909 	}
5910 
5911 	return 0;
5912 }
5913 
5914 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5915 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5916 			CP_ECC_ERROR_INT_ENABLE, 1)
5917 
5918 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5919 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5920 			CP_ECC_ERROR_INT_ENABLE, 0)
5921 
5922 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5923 					      struct amdgpu_irq_src *source,
5924 					      unsigned type,
5925 					      enum amdgpu_interrupt_state state)
5926 {
5927 	switch (state) {
5928 	case AMDGPU_IRQ_STATE_DISABLE:
5929 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5930 				CP_ECC_ERROR_INT_ENABLE, 0);
5931 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5932 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5933 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5934 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5935 		break;
5936 
5937 	case AMDGPU_IRQ_STATE_ENABLE:
5938 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5939 				CP_ECC_ERROR_INT_ENABLE, 1);
5940 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5941 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5942 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5943 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5944 		break;
5945 	default:
5946 		break;
5947 	}
5948 
5949 	return 0;
5950 }
5951 
5952 
5953 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5954 					    struct amdgpu_irq_src *src,
5955 					    unsigned type,
5956 					    enum amdgpu_interrupt_state state)
5957 {
5958 	switch (type) {
5959 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5960 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5961 		break;
5962 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5963 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5964 		break;
5965 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5966 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5967 		break;
5968 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5969 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5970 		break;
5971 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5972 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5973 		break;
5974 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5975 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5976 		break;
5977 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5978 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5979 		break;
5980 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5981 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5982 		break;
5983 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5984 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5985 		break;
5986 	default:
5987 		break;
5988 	}
5989 	return 0;
5990 }
5991 
5992 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5993 			    struct amdgpu_irq_src *source,
5994 			    struct amdgpu_iv_entry *entry)
5995 {
5996 	int i;
5997 	u8 me_id, pipe_id, queue_id;
5998 	struct amdgpu_ring *ring;
5999 
6000 	DRM_DEBUG("IH: CP EOP\n");
6001 	me_id = (entry->ring_id & 0x0c) >> 2;
6002 	pipe_id = (entry->ring_id & 0x03) >> 0;
6003 	queue_id = (entry->ring_id & 0x70) >> 4;
6004 
6005 	switch (me_id) {
6006 	case 0:
6007 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6008 		break;
6009 	case 1:
6010 	case 2:
6011 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6012 			ring = &adev->gfx.compute_ring[i];
6013 			/* Per-queue interrupt is supported for MEC starting from VI.
6014 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6015 			  */
6016 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6017 				amdgpu_fence_process(ring);
6018 		}
6019 		break;
6020 	}
6021 	return 0;
6022 }
6023 
6024 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6025 			   struct amdgpu_iv_entry *entry)
6026 {
6027 	u8 me_id, pipe_id, queue_id;
6028 	struct amdgpu_ring *ring;
6029 	int i;
6030 
6031 	me_id = (entry->ring_id & 0x0c) >> 2;
6032 	pipe_id = (entry->ring_id & 0x03) >> 0;
6033 	queue_id = (entry->ring_id & 0x70) >> 4;
6034 
6035 	switch (me_id) {
6036 	case 0:
6037 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6038 		break;
6039 	case 1:
6040 	case 2:
6041 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6042 			ring = &adev->gfx.compute_ring[i];
6043 			if (ring->me == me_id && ring->pipe == pipe_id &&
6044 			    ring->queue == queue_id)
6045 				drm_sched_fault(&ring->sched);
6046 		}
6047 		break;
6048 	}
6049 }
6050 
6051 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6052 				 struct amdgpu_irq_src *source,
6053 				 struct amdgpu_iv_entry *entry)
6054 {
6055 	DRM_ERROR("Illegal register access in command stream\n");
6056 	gfx_v9_0_fault(adev, entry);
6057 	return 0;
6058 }
6059 
6060 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6061 				  struct amdgpu_irq_src *source,
6062 				  struct amdgpu_iv_entry *entry)
6063 {
6064 	DRM_ERROR("Illegal instruction in command stream\n");
6065 	gfx_v9_0_fault(adev, entry);
6066 	return 0;
6067 }
6068 
6069 
6070 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6071 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6072 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6073 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6074 	},
6075 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6076 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6077 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6078 	},
6079 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6080 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6081 	  0, 0
6082 	},
6083 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6084 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6085 	  0, 0
6086 	},
6087 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6088 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6089 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6090 	},
6091 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6092 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6096 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6097 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6098 	},
6099 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6100 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6101 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6102 	},
6103 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6104 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6105 	  0, 0
6106 	},
6107 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6108 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6109 	  0, 0
6110 	},
6111 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6112 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6113 	  0, 0
6114 	},
6115 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6116 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6117 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6118 	},
6119 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6120 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6121 	  0, 0
6122 	},
6123 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6124 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6125 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6126 	},
6127 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6128 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6129 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6130 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6131 	},
6132 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6133 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6134 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6135 	  0, 0
6136 	},
6137 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6138 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6139 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6140 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6141 	},
6142 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6143 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6144 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6145 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6146 	},
6147 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6148 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6149 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6150 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6151 	},
6152 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6153 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6154 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6155 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6156 	},
6157 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6158 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6159 	  0, 0
6160 	},
6161 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6162 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6163 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6164 	},
6165 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6166 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6167 	  0, 0
6168 	},
6169 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6170 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6171 	  0, 0
6172 	},
6173 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6174 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6175 	  0, 0
6176 	},
6177 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6178 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6179 	  0, 0
6180 	},
6181 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6182 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6183 	  0, 0
6184 	},
6185 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6186 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6187 	  0, 0
6188 	},
6189 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6190 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6191 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6192 	},
6193 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6194 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6195 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6196 	},
6197 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6198 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6199 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6200 	},
6201 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6202 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6203 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6204 	},
6205 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6206 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6207 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6208 	},
6209 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6210 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6211 	  0, 0
6212 	},
6213 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6214 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6215 	  0, 0
6216 	},
6217 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6218 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6219 	  0, 0
6220 	},
6221 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6222 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6223 	  0, 0
6224 	},
6225 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6226 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6227 	  0, 0
6228 	},
6229 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6230 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6231 	  0, 0
6232 	},
6233 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6234 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6235 	  0, 0
6236 	},
6237 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6238 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6239 	  0, 0
6240 	},
6241 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6242 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6243 	  0, 0
6244 	},
6245 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6246 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6247 	  0, 0
6248 	},
6249 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6250 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6251 	  0, 0
6252 	},
6253 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6254 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6255 	  0, 0
6256 	},
6257 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6258 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6259 	  0, 0
6260 	},
6261 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6262 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6263 	  0, 0
6264 	},
6265 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6266 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6267 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6268 	},
6269 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6270 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6271 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6272 	},
6273 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6274 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6275 	  0, 0
6276 	},
6277 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6278 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6279 	  0, 0
6280 	},
6281 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6282 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6283 	  0, 0
6284 	},
6285 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6286 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6287 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6288 	},
6289 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6290 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6291 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6292 	},
6293 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6294 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6295 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6296 	},
6297 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6298 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6299 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6300 	},
6301 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6302 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6303 	  0, 0
6304 	},
6305 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6306 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6307 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6308 	},
6309 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6310 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6311 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6312 	},
6313 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6314 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6315 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6316 	},
6317 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6318 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6319 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6320 	},
6321 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6322 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6323 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6324 	},
6325 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6326 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6327 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6328 	},
6329 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6330 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6331 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6332 	},
6333 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6334 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6335 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6336 	},
6337 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6338 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6339 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6340 	},
6341 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6342 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6343 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6344 	},
6345 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6346 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6347 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6348 	},
6349 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6350 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6351 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6352 	},
6353 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6354 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6355 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6356 	},
6357 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6358 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6359 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6360 	},
6361 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6362 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6363 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6364 	},
6365 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6366 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6367 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6368 	},
6369 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6370 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6371 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6372 	},
6373 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6374 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6375 	  0, 0
6376 	},
6377 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6378 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6379 	  0, 0
6380 	},
6381 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6382 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6383 	  0, 0
6384 	},
6385 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6386 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6387 	  0, 0
6388 	},
6389 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6390 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6391 	  0, 0
6392 	},
6393 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6394 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6395 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6396 	},
6397 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6398 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6399 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6400 	},
6401 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6402 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6403 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6404 	},
6405 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6406 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6407 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6408 	},
6409 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6410 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6411 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6412 	},
6413 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6414 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6415 	  0, 0
6416 	},
6417 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6418 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6419 	  0, 0
6420 	},
6421 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6422 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6423 	  0, 0
6424 	},
6425 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6426 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6427 	  0, 0
6428 	},
6429 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6430 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6431 	  0, 0
6432 	},
6433 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6434 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6435 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6436 	},
6437 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6438 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6439 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6440 	},
6441 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6442 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6443 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6444 	},
6445 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6446 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6447 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6448 	},
6449 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6450 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6451 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6452 	},
6453 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6454 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6455 	  0, 0
6456 	},
6457 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6458 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6459 	  0, 0
6460 	},
6461 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6462 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6463 	  0, 0
6464 	},
6465 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6466 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6467 	  0, 0
6468 	},
6469 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6470 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6471 	  0, 0
6472 	},
6473 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6474 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6475 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6476 	},
6477 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6478 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6479 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6480 	},
6481 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6482 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6483 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6484 	},
6485 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6486 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6487 	  0, 0
6488 	},
6489 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6490 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6491 	  0, 0
6492 	},
6493 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6494 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6495 	  0, 0
6496 	},
6497 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6498 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6499 	  0, 0
6500 	},
6501 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6502 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6503 	  0, 0
6504 	},
6505 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6506 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6507 	  0, 0
6508 	}
6509 };
6510 
6511 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6512 				     void *inject_if)
6513 {
6514 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6515 	int ret;
6516 	struct ta_ras_trigger_error_input block_info = { 0 };
6517 
6518 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6519 		return -EINVAL;
6520 
6521 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6522 		return -EINVAL;
6523 
6524 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6525 		return -EPERM;
6526 
6527 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6528 	      info->head.type)) {
6529 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6530 			ras_gfx_subblocks[info->head.sub_block_index].name,
6531 			info->head.type);
6532 		return -EPERM;
6533 	}
6534 
6535 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6536 	      info->head.type)) {
6537 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6538 			ras_gfx_subblocks[info->head.sub_block_index].name,
6539 			info->head.type);
6540 		return -EPERM;
6541 	}
6542 
6543 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6544 	block_info.sub_block_index =
6545 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6546 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6547 	block_info.address = info->address;
6548 	block_info.value = info->value;
6549 
6550 	mutex_lock(&adev->grbm_idx_mutex);
6551 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6552 	mutex_unlock(&adev->grbm_idx_mutex);
6553 
6554 	return ret;
6555 }
6556 
6557 static const char *vml2_mems[] = {
6558 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6559 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6560 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6561 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6562 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6563 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6564 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6565 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6566 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6567 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6568 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6569 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6570 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6571 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6572 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6573 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6574 };
6575 
6576 static const char *vml2_walker_mems[] = {
6577 	"UTC_VML2_CACHE_PDE0_MEM0",
6578 	"UTC_VML2_CACHE_PDE0_MEM1",
6579 	"UTC_VML2_CACHE_PDE1_MEM0",
6580 	"UTC_VML2_CACHE_PDE1_MEM1",
6581 	"UTC_VML2_CACHE_PDE2_MEM0",
6582 	"UTC_VML2_CACHE_PDE2_MEM1",
6583 	"UTC_VML2_RDIF_LOG_FIFO",
6584 };
6585 
6586 static const char *atc_l2_cache_2m_mems[] = {
6587 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6588 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6589 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6590 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6591 };
6592 
6593 static const char *atc_l2_cache_4k_mems[] = {
6594 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6595 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6596 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6597 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6598 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6599 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6600 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6601 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6602 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6603 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6604 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6605 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6606 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6607 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6608 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6609 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6610 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6611 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6612 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6613 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6614 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6615 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6616 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6617 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6618 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6619 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6620 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6621 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6622 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6623 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6624 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6625 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6626 };
6627 
6628 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6629 					 struct ras_err_data *err_data)
6630 {
6631 	uint32_t i, data;
6632 	uint32_t sec_count, ded_count;
6633 
6634 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6635 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6636 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6637 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6638 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6639 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6640 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6641 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6642 
6643 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6644 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6645 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6646 
6647 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6648 		if (sec_count) {
6649 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6650 				"SEC %d\n", i, vml2_mems[i], sec_count);
6651 			err_data->ce_count += sec_count;
6652 		}
6653 
6654 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6655 		if (ded_count) {
6656 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6657 				"DED %d\n", i, vml2_mems[i], ded_count);
6658 			err_data->ue_count += ded_count;
6659 		}
6660 	}
6661 
6662 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6663 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6664 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6665 
6666 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6667 						SEC_COUNT);
6668 		if (sec_count) {
6669 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6670 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6671 			err_data->ce_count += sec_count;
6672 		}
6673 
6674 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6675 						DED_COUNT);
6676 		if (ded_count) {
6677 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6678 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6679 			err_data->ue_count += ded_count;
6680 		}
6681 	}
6682 
6683 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6684 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6685 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6686 
6687 		sec_count = (data & 0x00006000L) >> 0xd;
6688 		if (sec_count) {
6689 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6690 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6691 				sec_count);
6692 			err_data->ce_count += sec_count;
6693 		}
6694 	}
6695 
6696 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6697 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6698 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6699 
6700 		sec_count = (data & 0x00006000L) >> 0xd;
6701 		if (sec_count) {
6702 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6703 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6704 				sec_count);
6705 			err_data->ce_count += sec_count;
6706 		}
6707 
6708 		ded_count = (data & 0x00018000L) >> 0xf;
6709 		if (ded_count) {
6710 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6711 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6712 				ded_count);
6713 			err_data->ue_count += ded_count;
6714 		}
6715 	}
6716 
6717 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6718 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6719 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6720 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6721 
6722 	return 0;
6723 }
6724 
6725 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6726 	const struct soc15_reg_entry *reg,
6727 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6728 	uint32_t *sec_count, uint32_t *ded_count)
6729 {
6730 	uint32_t i;
6731 	uint32_t sec_cnt, ded_cnt;
6732 
6733 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6734 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6735 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6736 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6737 			continue;
6738 
6739 		sec_cnt = (value &
6740 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6741 				gfx_v9_0_ras_fields[i].sec_count_shift;
6742 		if (sec_cnt) {
6743 			dev_info(adev->dev, "GFX SubBlock %s, "
6744 				"Instance[%d][%d], SEC %d\n",
6745 				gfx_v9_0_ras_fields[i].name,
6746 				se_id, inst_id,
6747 				sec_cnt);
6748 			*sec_count += sec_cnt;
6749 		}
6750 
6751 		ded_cnt = (value &
6752 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6753 				gfx_v9_0_ras_fields[i].ded_count_shift;
6754 		if (ded_cnt) {
6755 			dev_info(adev->dev, "GFX SubBlock %s, "
6756 				"Instance[%d][%d], DED %d\n",
6757 				gfx_v9_0_ras_fields[i].name,
6758 				se_id, inst_id,
6759 				ded_cnt);
6760 			*ded_count += ded_cnt;
6761 		}
6762 	}
6763 
6764 	return 0;
6765 }
6766 
6767 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6768 {
6769 	int i, j, k;
6770 
6771 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6772 		return;
6773 
6774 	/* read back registers to clear the counters */
6775 	mutex_lock(&adev->grbm_idx_mutex);
6776 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6777 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6778 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6779 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6780 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6781 			}
6782 		}
6783 	}
6784 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6785 	mutex_unlock(&adev->grbm_idx_mutex);
6786 
6787 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6788 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6789 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6790 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6791 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6792 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6793 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6794 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6795 
6796 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6797 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6798 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6799 	}
6800 
6801 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6802 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6803 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6804 	}
6805 
6806 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6807 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6808 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6809 	}
6810 
6811 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6812 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6813 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6814 	}
6815 
6816 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6817 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6818 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6819 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6820 }
6821 
6822 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6823 					  void *ras_error_status)
6824 {
6825 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6826 	uint32_t sec_count = 0, ded_count = 0;
6827 	uint32_t i, j, k;
6828 	uint32_t reg_value;
6829 
6830 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6831 		return -EINVAL;
6832 
6833 	err_data->ue_count = 0;
6834 	err_data->ce_count = 0;
6835 
6836 	mutex_lock(&adev->grbm_idx_mutex);
6837 
6838 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6839 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6840 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6841 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6842 				reg_value =
6843 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6844 				if (reg_value)
6845 					gfx_v9_0_ras_error_count(adev,
6846 						&gfx_v9_0_edc_counter_regs[i],
6847 						j, k, reg_value,
6848 						&sec_count, &ded_count);
6849 			}
6850 		}
6851 	}
6852 
6853 	err_data->ce_count += sec_count;
6854 	err_data->ue_count += ded_count;
6855 
6856 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6857 	mutex_unlock(&adev->grbm_idx_mutex);
6858 
6859 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6860 
6861 	return 0;
6862 }
6863 
6864 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6865 {
6866 	const unsigned int cp_coher_cntl =
6867 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6868 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6869 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6870 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6871 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6872 
6873 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6874 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6875 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6876 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6877 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6878 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6879 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6880 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6881 }
6882 
6883 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6884 					uint32_t pipe, bool enable)
6885 {
6886 	struct amdgpu_device *adev = ring->adev;
6887 	uint32_t val;
6888 	uint32_t wcl_cs_reg;
6889 
6890 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6891 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6892 
6893 	switch (pipe) {
6894 	case 0:
6895 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6896 		break;
6897 	case 1:
6898 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6899 		break;
6900 	case 2:
6901 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6902 		break;
6903 	case 3:
6904 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6905 		break;
6906 	default:
6907 		DRM_DEBUG("invalid pipe %d\n", pipe);
6908 		return;
6909 	}
6910 
6911 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6912 
6913 }
6914 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6915 {
6916 	struct amdgpu_device *adev = ring->adev;
6917 	uint32_t val;
6918 	int i;
6919 
6920 
6921 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6922 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6923 	 * around 25% of gpu resources.
6924 	 */
6925 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6926 	amdgpu_ring_emit_wreg(ring,
6927 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6928 			      val);
6929 
6930 	/* Restrict waves for normal/low priority compute queues as well
6931 	 * to get best QoS for high priority compute jobs.
6932 	 *
6933 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6934 	 */
6935 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6936 		if (i != ring->pipe)
6937 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6938 
6939 	}
6940 }
6941 
6942 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6943 	.name = "gfx_v9_0",
6944 	.early_init = gfx_v9_0_early_init,
6945 	.late_init = gfx_v9_0_late_init,
6946 	.sw_init = gfx_v9_0_sw_init,
6947 	.sw_fini = gfx_v9_0_sw_fini,
6948 	.hw_init = gfx_v9_0_hw_init,
6949 	.hw_fini = gfx_v9_0_hw_fini,
6950 	.suspend = gfx_v9_0_suspend,
6951 	.resume = gfx_v9_0_resume,
6952 	.is_idle = gfx_v9_0_is_idle,
6953 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6954 	.soft_reset = gfx_v9_0_soft_reset,
6955 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6956 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6957 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6958 };
6959 
6960 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6961 	.type = AMDGPU_RING_TYPE_GFX,
6962 	.align_mask = 0xff,
6963 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964 	.support_64bit_ptrs = true,
6965 	.vmhub = AMDGPU_GFXHUB_0,
6966 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6967 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6968 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6969 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6970 		5 +  /* COND_EXEC */
6971 		7 +  /* PIPELINE_SYNC */
6972 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6973 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6974 		2 + /* VM_FLUSH */
6975 		8 +  /* FENCE for VM_FLUSH */
6976 		20 + /* GDS switch */
6977 		4 + /* double SWITCH_BUFFER,
6978 		       the first COND_EXEC jump to the place just
6979 			   prior to this double SWITCH_BUFFER  */
6980 		5 + /* COND_EXEC */
6981 		7 +	 /*	HDP_flush */
6982 		4 +	 /*	VGT_flush */
6983 		14 + /*	CE_META */
6984 		31 + /*	DE_META */
6985 		3 + /* CNTX_CTRL */
6986 		5 + /* HDP_INVL */
6987 		8 + 8 + /* FENCE x2 */
6988 		2 + /* SWITCH_BUFFER */
6989 		7, /* gfx_v9_0_emit_mem_sync */
6990 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6991 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6992 	.emit_fence = gfx_v9_0_ring_emit_fence,
6993 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6994 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6995 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6996 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6997 	.test_ring = gfx_v9_0_ring_test_ring,
6998 	.test_ib = gfx_v9_0_ring_test_ib,
6999 	.insert_nop = amdgpu_ring_insert_nop,
7000 	.pad_ib = amdgpu_ring_generic_pad_ib,
7001 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7002 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7003 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7004 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
7005 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7006 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7007 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7008 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7009 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7010 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7011 };
7012 
7013 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7014 	.type = AMDGPU_RING_TYPE_COMPUTE,
7015 	.align_mask = 0xff,
7016 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7017 	.support_64bit_ptrs = true,
7018 	.vmhub = AMDGPU_GFXHUB_0,
7019 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7020 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7021 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7022 	.emit_frame_size =
7023 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7024 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7025 		5 + /* hdp invalidate */
7026 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7027 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7028 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7029 		2 + /* gfx_v9_0_ring_emit_vm_flush */
7030 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7031 		7 + /* gfx_v9_0_emit_mem_sync */
7032 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7033 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7034 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7035 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7036 	.emit_fence = gfx_v9_0_ring_emit_fence,
7037 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7038 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7039 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7040 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7041 	.test_ring = gfx_v9_0_ring_test_ring,
7042 	.test_ib = gfx_v9_0_ring_test_ib,
7043 	.insert_nop = amdgpu_ring_insert_nop,
7044 	.pad_ib = amdgpu_ring_generic_pad_ib,
7045 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7046 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7047 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7048 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7049 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7050 };
7051 
7052 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7053 	.type = AMDGPU_RING_TYPE_KIQ,
7054 	.align_mask = 0xff,
7055 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7056 	.support_64bit_ptrs = true,
7057 	.vmhub = AMDGPU_GFXHUB_0,
7058 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7059 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7060 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7061 	.emit_frame_size =
7062 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7063 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7064 		5 + /* hdp invalidate */
7065 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7066 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7067 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7068 		2 + /* gfx_v9_0_ring_emit_vm_flush */
7069 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7070 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7071 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7072 	.test_ring = gfx_v9_0_ring_test_ring,
7073 	.insert_nop = amdgpu_ring_insert_nop,
7074 	.pad_ib = amdgpu_ring_generic_pad_ib,
7075 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7076 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7077 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7078 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7079 };
7080 
7081 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7082 {
7083 	int i;
7084 
7085 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7086 
7087 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7088 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7089 
7090 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7091 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7092 }
7093 
7094 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7095 	.set = gfx_v9_0_set_eop_interrupt_state,
7096 	.process = gfx_v9_0_eop_irq,
7097 };
7098 
7099 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7100 	.set = gfx_v9_0_set_priv_reg_fault_state,
7101 	.process = gfx_v9_0_priv_reg_irq,
7102 };
7103 
7104 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7105 	.set = gfx_v9_0_set_priv_inst_fault_state,
7106 	.process = gfx_v9_0_priv_inst_irq,
7107 };
7108 
7109 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7110 	.set = gfx_v9_0_set_cp_ecc_error_state,
7111 	.process = amdgpu_gfx_cp_ecc_error_irq,
7112 };
7113 
7114 
7115 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7116 {
7117 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7118 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7119 
7120 	adev->gfx.priv_reg_irq.num_types = 1;
7121 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7122 
7123 	adev->gfx.priv_inst_irq.num_types = 1;
7124 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7125 
7126 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7127 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7128 }
7129 
7130 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7131 {
7132 	switch (adev->ip_versions[GC_HWIP][0]) {
7133 	case IP_VERSION(9, 0, 1):
7134 	case IP_VERSION(9, 2, 1):
7135 	case IP_VERSION(9, 4, 0):
7136 	case IP_VERSION(9, 2, 2):
7137 	case IP_VERSION(9, 1, 0):
7138 	case IP_VERSION(9, 4, 1):
7139 	case IP_VERSION(9, 3, 0):
7140 	case IP_VERSION(9, 4, 2):
7141 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7142 		break;
7143 	default:
7144 		break;
7145 	}
7146 }
7147 
7148 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7149 {
7150 	/* init asci gds info */
7151 	switch (adev->ip_versions[GC_HWIP][0]) {
7152 	case IP_VERSION(9, 0, 1):
7153 	case IP_VERSION(9, 2, 1):
7154 	case IP_VERSION(9, 4, 0):
7155 		adev->gds.gds_size = 0x10000;
7156 		break;
7157 	case IP_VERSION(9, 2, 2):
7158 	case IP_VERSION(9, 1, 0):
7159 	case IP_VERSION(9, 4, 1):
7160 		adev->gds.gds_size = 0x1000;
7161 		break;
7162 	case IP_VERSION(9, 4, 2):
7163 		/* aldebaran removed all the GDS internal memory,
7164 		 * only support GWS opcode in kernel, like barrier
7165 		 * semaphore.etc */
7166 		adev->gds.gds_size = 0;
7167 		break;
7168 	default:
7169 		adev->gds.gds_size = 0x10000;
7170 		break;
7171 	}
7172 
7173 	switch (adev->ip_versions[GC_HWIP][0]) {
7174 	case IP_VERSION(9, 0, 1):
7175 	case IP_VERSION(9, 4, 0):
7176 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7177 		break;
7178 	case IP_VERSION(9, 2, 1):
7179 		adev->gds.gds_compute_max_wave_id = 0x27f;
7180 		break;
7181 	case IP_VERSION(9, 2, 2):
7182 	case IP_VERSION(9, 1, 0):
7183 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7184 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7185 		else
7186 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7187 		break;
7188 	case IP_VERSION(9, 4, 1):
7189 		adev->gds.gds_compute_max_wave_id = 0xfff;
7190 		break;
7191 	case IP_VERSION(9, 4, 2):
7192 		/* deprecated for Aldebaran, no usage at all */
7193 		adev->gds.gds_compute_max_wave_id = 0;
7194 		break;
7195 	default:
7196 		/* this really depends on the chip */
7197 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7198 		break;
7199 	}
7200 
7201 	adev->gds.gws_size = 64;
7202 	adev->gds.oa_size = 16;
7203 }
7204 
7205 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7206 						 u32 bitmap)
7207 {
7208 	u32 data;
7209 
7210 	if (!bitmap)
7211 		return;
7212 
7213 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7214 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7215 
7216 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7217 }
7218 
7219 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7220 {
7221 	u32 data, mask;
7222 
7223 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7224 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7225 
7226 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7227 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7228 
7229 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7230 
7231 	return (~data) & mask;
7232 }
7233 
7234 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7235 				 struct amdgpu_cu_info *cu_info)
7236 {
7237 	int i, j, k, counter, active_cu_number = 0;
7238 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7239 	unsigned disable_masks[4 * 4];
7240 
7241 	if (!adev || !cu_info)
7242 		return -EINVAL;
7243 
7244 	/*
7245 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7246 	 */
7247 	if (adev->gfx.config.max_shader_engines *
7248 		adev->gfx.config.max_sh_per_se > 16)
7249 		return -EINVAL;
7250 
7251 	amdgpu_gfx_parse_disable_cu(disable_masks,
7252 				    adev->gfx.config.max_shader_engines,
7253 				    adev->gfx.config.max_sh_per_se);
7254 
7255 	mutex_lock(&adev->grbm_idx_mutex);
7256 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7257 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7258 			mask = 1;
7259 			ao_bitmap = 0;
7260 			counter = 0;
7261 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7262 			gfx_v9_0_set_user_cu_inactive_bitmap(
7263 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7264 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7265 
7266 			/*
7267 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7268 			 * 4x4 size array, and it's usually suitable for Vega
7269 			 * ASICs which has 4*2 SE/SH layout.
7270 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7271 			 * To mostly reduce the impact, we make it compatible
7272 			 * with current bitmap array as below:
7273 			 *    SE4,SH0 --> bitmap[0][1]
7274 			 *    SE5,SH0 --> bitmap[1][1]
7275 			 *    SE6,SH0 --> bitmap[2][1]
7276 			 *    SE7,SH0 --> bitmap[3][1]
7277 			 */
7278 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7279 
7280 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7281 				if (bitmap & mask) {
7282 					if (counter < adev->gfx.config.max_cu_per_sh)
7283 						ao_bitmap |= mask;
7284 					counter ++;
7285 				}
7286 				mask <<= 1;
7287 			}
7288 			active_cu_number += counter;
7289 			if (i < 2 && j < 2)
7290 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7291 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7292 		}
7293 	}
7294 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7295 	mutex_unlock(&adev->grbm_idx_mutex);
7296 
7297 	cu_info->number = active_cu_number;
7298 	cu_info->ao_cu_mask = ao_cu_mask;
7299 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7300 
7301 	return 0;
7302 }
7303 
7304 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7305 {
7306 	.type = AMD_IP_BLOCK_TYPE_GFX,
7307 	.major = 9,
7308 	.minor = 0,
7309 	.rev = 0,
7310 	.funcs = &gfx_v9_0_ip_funcs,
7311 };
7312