xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 817396dc9f6ab2481b94071de2e586aae876e89c)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #include "sdma0/sdma0_4_2_offset.h"
52 #include "sdma1/sdma1_4_2_offset.h"
53 #include "sdma2/sdma2_4_2_2_offset.h"
54 #include "sdma3/sdma3_4_2_2_offset.h"
55 #include "sdma4/sdma4_4_2_2_offset.h"
56 #include "sdma5/sdma5_4_2_2_offset.h"
57 #include "sdma6/sdma6_4_2_2_offset.h"
58 #include "sdma7/sdma7_4_2_2_offset.h"
59 
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmPWR_MISC_CNTL_STATUS					0x0183
66 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
67 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
68 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
69 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
70 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
71 
72 #define mmGCEA_PROBE_MAP                        0x070c
73 #define mmGCEA_PROBE_MAP_BASE_IDX               0
74 
75 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
90 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
92 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
93 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
97 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/raven_me.bin");
99 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
100 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
102 
103 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
104 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
106 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
107 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
109 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
110 
111 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
112 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
114 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
115 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
116 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
120 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
122 
123 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
124 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
126 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
127 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 enum ta_ras_gfx_subblock {
144 	/*CPC*/
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
146 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
147 	TA_RAS_BLOCK__GFX_CPC_UCODE,
148 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
149 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
150 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
151 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
152 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
153 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
155 	/* CPF*/
156 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
157 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
158 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
159 	TA_RAS_BLOCK__GFX_CPF_TAG,
160 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
161 	/* CPG*/
162 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
164 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
165 	TA_RAS_BLOCK__GFX_CPG_TAG,
166 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
167 	/* GDS*/
168 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
169 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
170 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
171 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
172 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
173 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
174 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
175 	/* SPI*/
176 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
177 	/* SQ*/
178 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
180 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
181 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
182 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
184 	/* SQC (3 ranges)*/
185 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
186 	/* SQC range 0*/
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
189 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
190 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
191 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
192 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
193 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
194 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
197 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
198 	/* SQC range 1*/
199 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
200 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
201 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
202 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
203 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
204 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
205 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
206 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
207 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
209 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
211 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
212 	/* SQC range 2*/
213 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
214 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
215 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
216 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
218 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
220 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
221 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
223 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
225 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
227 	/* TA*/
228 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
231 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
232 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
233 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
234 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
235 	/* TCA*/
236 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
239 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
240 	/* TCC (5 sub-ranges)*/
241 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
242 	/* TCC range 0*/
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
244 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
245 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
246 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
247 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
248 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
250 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
251 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
253 	/* TCC range 1*/
254 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
255 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
256 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
258 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
259 	/* TCC range 2*/
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
261 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
262 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
263 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
264 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
265 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
266 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
267 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
268 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
270 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
271 	/* TCC range 3*/
272 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
273 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
274 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
276 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
277 	/* TCC range 4*/
278 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
279 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
280 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
281 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
282 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
283 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
285 	/* TCI*/
286 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
287 	/* TCP*/
288 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
289 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
291 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
292 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
293 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
294 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
295 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
296 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
297 	/* TD*/
298 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
300 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
301 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
302 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
303 	/* EA (3 sub-ranges)*/
304 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
305 	/* EA range 0*/
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
307 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
308 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
309 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
310 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
311 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
312 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
316 	/* EA range 1*/
317 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
318 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
319 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
320 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
321 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
324 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
326 	/* EA range 2*/
327 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
328 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
329 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
330 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
331 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
333 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
334 	/* UTC VM L2 bank*/
335 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
336 	/* UTC VM walker*/
337 	TA_RAS_BLOCK__UTC_VML2_WALKER,
338 	/* UTC ATC L2 2MB cache*/
339 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
340 	/* UTC ATC L2 4KB cache*/
341 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
342 	TA_RAS_BLOCK__GFX_MAX
343 };
344 
345 struct ras_gfx_subblock {
346 	unsigned char *name;
347 	int ta_subblock;
348 	int hw_supported_error_type;
349 	int sw_supported_error_type;
350 };
351 
352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
353 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
354 		#subblock,                                                     \
355 		TA_RAS_BLOCK__##subblock,                                      \
356 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
357 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
358 	}
359 
360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
378 			     0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
380 			     0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
389 			     0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391 			     0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
393 			     0, 0),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
395 			     0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
397 			     0, 0),
398 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
399 			     0),
400 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
401 			     1),
402 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
403 			     0, 0, 0),
404 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 			     0),
406 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407 			     0),
408 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
409 			     0),
410 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
411 			     0),
412 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413 			     0),
414 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
415 			     0, 0),
416 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 			     0),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
419 			     0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
421 			     0, 0, 0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 			     0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
425 			     0),
426 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
427 			     0),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
429 			     0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
431 			     0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
433 			     0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
435 			     0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
445 			     1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
447 			     1),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
449 			     1),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
451 			     0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
453 			     0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
466 			     0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
469 			     0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
471 			     0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
473 			     0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
508 };
509 
510 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
511 {
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
532 };
533 
534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
535 {
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
554 };
555 
556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
557 {
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
569 };
570 
571 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
572 {
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
597 };
598 
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
600 {
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
608 };
609 
610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
611 {
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
631 };
632 
633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
634 {
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
654 };
655 
656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
657 {
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
674 };
675 
676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
677 {
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
691 };
692 
693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
694 {
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
705 };
706 
707 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
708 {
709 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 };
718 
719 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
720 {
721 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 };
730 
731 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
732 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
733 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
734 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
735 
736 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
737 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
738 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
739 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
740 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
741                                  struct amdgpu_cu_info *cu_info);
742 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
743 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
744 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
745 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
746 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
747 					  void *ras_error_status);
748 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
749 				     void *inject_if);
750 
751 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
752 {
753 	switch (adev->asic_type) {
754 	case CHIP_VEGA10:
755 		soc15_program_register_sequence(adev,
756 						golden_settings_gc_9_0,
757 						ARRAY_SIZE(golden_settings_gc_9_0));
758 		soc15_program_register_sequence(adev,
759 						golden_settings_gc_9_0_vg10,
760 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
761 		break;
762 	case CHIP_VEGA12:
763 		soc15_program_register_sequence(adev,
764 						golden_settings_gc_9_2_1,
765 						ARRAY_SIZE(golden_settings_gc_9_2_1));
766 		soc15_program_register_sequence(adev,
767 						golden_settings_gc_9_2_1_vg12,
768 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
769 		break;
770 	case CHIP_VEGA20:
771 		soc15_program_register_sequence(adev,
772 						golden_settings_gc_9_0,
773 						ARRAY_SIZE(golden_settings_gc_9_0));
774 		soc15_program_register_sequence(adev,
775 						golden_settings_gc_9_0_vg20,
776 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
777 		break;
778 	case CHIP_ARCTURUS:
779 		soc15_program_register_sequence(adev,
780 						golden_settings_gc_9_4_1_arct,
781 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
782 		break;
783 	case CHIP_RAVEN:
784 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
785 						ARRAY_SIZE(golden_settings_gc_9_1));
786 		if (adev->rev_id >= 8)
787 			soc15_program_register_sequence(adev,
788 							golden_settings_gc_9_1_rv2,
789 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
790 		else
791 			soc15_program_register_sequence(adev,
792 							golden_settings_gc_9_1_rv1,
793 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
794 		break;
795 	 case CHIP_RENOIR:
796 		soc15_program_register_sequence(adev,
797 						golden_settings_gc_9_1_rn,
798 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
799 		return; /* for renoir, don't need common goldensetting */
800 	default:
801 		break;
802 	}
803 
804 	if (adev->asic_type != CHIP_ARCTURUS)
805 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
806 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
807 }
808 
809 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
810 {
811 	adev->gfx.scratch.num_reg = 8;
812 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
813 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
814 }
815 
816 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
817 				       bool wc, uint32_t reg, uint32_t val)
818 {
819 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
820 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
821 				WRITE_DATA_DST_SEL(0) |
822 				(wc ? WR_CONFIRM : 0));
823 	amdgpu_ring_write(ring, reg);
824 	amdgpu_ring_write(ring, 0);
825 	amdgpu_ring_write(ring, val);
826 }
827 
828 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
829 				  int mem_space, int opt, uint32_t addr0,
830 				  uint32_t addr1, uint32_t ref, uint32_t mask,
831 				  uint32_t inv)
832 {
833 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
834 	amdgpu_ring_write(ring,
835 				 /* memory (1) or register (0) */
836 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
837 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
838 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
839 				 WAIT_REG_MEM_ENGINE(eng_sel)));
840 
841 	if (mem_space)
842 		BUG_ON(addr0 & 0x3); /* Dword align */
843 	amdgpu_ring_write(ring, addr0);
844 	amdgpu_ring_write(ring, addr1);
845 	amdgpu_ring_write(ring, ref);
846 	amdgpu_ring_write(ring, mask);
847 	amdgpu_ring_write(ring, inv); /* poll interval */
848 }
849 
850 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
851 {
852 	struct amdgpu_device *adev = ring->adev;
853 	uint32_t scratch;
854 	uint32_t tmp = 0;
855 	unsigned i;
856 	int r;
857 
858 	r = amdgpu_gfx_scratch_get(adev, &scratch);
859 	if (r)
860 		return r;
861 
862 	WREG32(scratch, 0xCAFEDEAD);
863 	r = amdgpu_ring_alloc(ring, 3);
864 	if (r)
865 		goto error_free_scratch;
866 
867 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
868 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
869 	amdgpu_ring_write(ring, 0xDEADBEEF);
870 	amdgpu_ring_commit(ring);
871 
872 	for (i = 0; i < adev->usec_timeout; i++) {
873 		tmp = RREG32(scratch);
874 		if (tmp == 0xDEADBEEF)
875 			break;
876 		udelay(1);
877 	}
878 
879 	if (i >= adev->usec_timeout)
880 		r = -ETIMEDOUT;
881 
882 error_free_scratch:
883 	amdgpu_gfx_scratch_free(adev, scratch);
884 	return r;
885 }
886 
887 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
888 {
889 	struct amdgpu_device *adev = ring->adev;
890 	struct amdgpu_ib ib;
891 	struct dma_fence *f = NULL;
892 
893 	unsigned index;
894 	uint64_t gpu_addr;
895 	uint32_t tmp;
896 	long r;
897 
898 	r = amdgpu_device_wb_get(adev, &index);
899 	if (r)
900 		return r;
901 
902 	gpu_addr = adev->wb.gpu_addr + (index * 4);
903 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
904 	memset(&ib, 0, sizeof(ib));
905 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
906 	if (r)
907 		goto err1;
908 
909 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
910 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
911 	ib.ptr[2] = lower_32_bits(gpu_addr);
912 	ib.ptr[3] = upper_32_bits(gpu_addr);
913 	ib.ptr[4] = 0xDEADBEEF;
914 	ib.length_dw = 5;
915 
916 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
917 	if (r)
918 		goto err2;
919 
920 	r = dma_fence_wait_timeout(f, false, timeout);
921 	if (r == 0) {
922 		r = -ETIMEDOUT;
923 		goto err2;
924 	} else if (r < 0) {
925 		goto err2;
926 	}
927 
928 	tmp = adev->wb.wb[index];
929 	if (tmp == 0xDEADBEEF)
930 		r = 0;
931 	else
932 		r = -EINVAL;
933 
934 err2:
935 	amdgpu_ib_free(adev, &ib, NULL);
936 	dma_fence_put(f);
937 err1:
938 	amdgpu_device_wb_free(adev, index);
939 	return r;
940 }
941 
942 
943 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
944 {
945 	release_firmware(adev->gfx.pfp_fw);
946 	adev->gfx.pfp_fw = NULL;
947 	release_firmware(adev->gfx.me_fw);
948 	adev->gfx.me_fw = NULL;
949 	release_firmware(adev->gfx.ce_fw);
950 	adev->gfx.ce_fw = NULL;
951 	release_firmware(adev->gfx.rlc_fw);
952 	adev->gfx.rlc_fw = NULL;
953 	release_firmware(adev->gfx.mec_fw);
954 	adev->gfx.mec_fw = NULL;
955 	release_firmware(adev->gfx.mec2_fw);
956 	adev->gfx.mec2_fw = NULL;
957 
958 	kfree(adev->gfx.rlc.register_list_format);
959 }
960 
961 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
962 {
963 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
964 
965 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
966 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
967 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
968 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
969 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
970 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
971 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
972 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
973 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
974 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
975 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
976 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
977 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
978 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
979 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
980 }
981 
982 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
983 {
984 	adev->gfx.me_fw_write_wait = false;
985 	adev->gfx.mec_fw_write_wait = false;
986 
987 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
988 	    (adev->gfx.mec_feature_version < 46) ||
989 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
990 	    (adev->gfx.pfp_feature_version < 46))
991 		DRM_WARN_ONCE("CP firmware version too old, please update!");
992 
993 	switch (adev->asic_type) {
994 	case CHIP_VEGA10:
995 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
996 		    (adev->gfx.me_feature_version >= 42) &&
997 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
998 		    (adev->gfx.pfp_feature_version >= 42))
999 			adev->gfx.me_fw_write_wait = true;
1000 
1001 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1002 		    (adev->gfx.mec_feature_version >= 42))
1003 			adev->gfx.mec_fw_write_wait = true;
1004 		break;
1005 	case CHIP_VEGA12:
1006 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1007 		    (adev->gfx.me_feature_version >= 44) &&
1008 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1009 		    (adev->gfx.pfp_feature_version >= 44))
1010 			adev->gfx.me_fw_write_wait = true;
1011 
1012 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1013 		    (adev->gfx.mec_feature_version >= 44))
1014 			adev->gfx.mec_fw_write_wait = true;
1015 		break;
1016 	case CHIP_VEGA20:
1017 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1018 		    (adev->gfx.me_feature_version >= 44) &&
1019 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1020 		    (adev->gfx.pfp_feature_version >= 44))
1021 			adev->gfx.me_fw_write_wait = true;
1022 
1023 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1024 		    (adev->gfx.mec_feature_version >= 44))
1025 			adev->gfx.mec_fw_write_wait = true;
1026 		break;
1027 	case CHIP_RAVEN:
1028 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1029 		    (adev->gfx.me_feature_version >= 42) &&
1030 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1031 		    (adev->gfx.pfp_feature_version >= 42))
1032 			adev->gfx.me_fw_write_wait = true;
1033 
1034 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1035 		    (adev->gfx.mec_feature_version >= 42))
1036 			adev->gfx.mec_fw_write_wait = true;
1037 		break;
1038 	default:
1039 		break;
1040 	}
1041 }
1042 
1043 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1044 {
1045 	switch (adev->asic_type) {
1046 	case CHIP_VEGA10:
1047 	case CHIP_VEGA12:
1048 	case CHIP_VEGA20:
1049 		break;
1050 	case CHIP_RAVEN:
1051 		if (!(adev->rev_id >= 0x8 ||
1052 		      adev->pdev->device == 0x15d8) &&
1053 		    (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1054 		     !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1055 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1056 
1057 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1058 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1059 				AMD_PG_SUPPORT_CP |
1060 				AMD_PG_SUPPORT_RLC_SMU_HS;
1061 		break;
1062 	case CHIP_RENOIR:
1063 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1064 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1065 				AMD_PG_SUPPORT_CP |
1066 				AMD_PG_SUPPORT_RLC_SMU_HS;
1067 		break;
1068 	default:
1069 		break;
1070 	}
1071 }
1072 
1073 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1074 					  const char *chip_name)
1075 {
1076 	char fw_name[30];
1077 	int err;
1078 	struct amdgpu_firmware_info *info = NULL;
1079 	const struct common_firmware_header *header = NULL;
1080 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1081 
1082 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1083 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1084 	if (err)
1085 		goto out;
1086 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1087 	if (err)
1088 		goto out;
1089 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1090 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1091 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1092 
1093 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1094 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1095 	if (err)
1096 		goto out;
1097 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1098 	if (err)
1099 		goto out;
1100 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1101 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1102 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1103 
1104 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1105 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1106 	if (err)
1107 		goto out;
1108 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1109 	if (err)
1110 		goto out;
1111 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1112 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1113 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1114 
1115 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1116 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1117 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1118 		info->fw = adev->gfx.pfp_fw;
1119 		header = (const struct common_firmware_header *)info->fw->data;
1120 		adev->firmware.fw_size +=
1121 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1122 
1123 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1124 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1125 		info->fw = adev->gfx.me_fw;
1126 		header = (const struct common_firmware_header *)info->fw->data;
1127 		adev->firmware.fw_size +=
1128 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1129 
1130 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1131 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1132 		info->fw = adev->gfx.ce_fw;
1133 		header = (const struct common_firmware_header *)info->fw->data;
1134 		adev->firmware.fw_size +=
1135 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1136 	}
1137 
1138 out:
1139 	if (err) {
1140 		dev_err(adev->dev,
1141 			"gfx9: Failed to load firmware \"%s\"\n",
1142 			fw_name);
1143 		release_firmware(adev->gfx.pfp_fw);
1144 		adev->gfx.pfp_fw = NULL;
1145 		release_firmware(adev->gfx.me_fw);
1146 		adev->gfx.me_fw = NULL;
1147 		release_firmware(adev->gfx.ce_fw);
1148 		adev->gfx.ce_fw = NULL;
1149 	}
1150 	return err;
1151 }
1152 
1153 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1154 					  const char *chip_name)
1155 {
1156 	char fw_name[30];
1157 	int err;
1158 	struct amdgpu_firmware_info *info = NULL;
1159 	const struct common_firmware_header *header = NULL;
1160 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1161 	unsigned int *tmp = NULL;
1162 	unsigned int i = 0;
1163 	uint16_t version_major;
1164 	uint16_t version_minor;
1165 	uint32_t smu_version;
1166 
1167 	/*
1168 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1169 	 * instead of picasso_rlc.bin.
1170 	 * Judgment method:
1171 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1172 	 *          or revision >= 0xD8 && revision <= 0xDF
1173 	 * otherwise is PCO FP5
1174 	 */
1175 	if (!strcmp(chip_name, "picasso") &&
1176 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1177 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1178 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1179 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1180 		(smu_version >= 0x41e2b))
1181 		/**
1182 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1183 		*/
1184 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1185 	else
1186 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1187 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1188 	if (err)
1189 		goto out;
1190 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1191 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1192 
1193 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1194 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1195 	if (version_major == 2 && version_minor == 1)
1196 		adev->gfx.rlc.is_rlc_v2_1 = true;
1197 
1198 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1199 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1200 	adev->gfx.rlc.save_and_restore_offset =
1201 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1202 	adev->gfx.rlc.clear_state_descriptor_offset =
1203 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1204 	adev->gfx.rlc.avail_scratch_ram_locations =
1205 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1206 	adev->gfx.rlc.reg_restore_list_size =
1207 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1208 	adev->gfx.rlc.reg_list_format_start =
1209 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1210 	adev->gfx.rlc.reg_list_format_separate_start =
1211 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1212 	adev->gfx.rlc.starting_offsets_start =
1213 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1214 	adev->gfx.rlc.reg_list_format_size_bytes =
1215 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1216 	adev->gfx.rlc.reg_list_size_bytes =
1217 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1218 	adev->gfx.rlc.register_list_format =
1219 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1220 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1221 	if (!adev->gfx.rlc.register_list_format) {
1222 		err = -ENOMEM;
1223 		goto out;
1224 	}
1225 
1226 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1227 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1228 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1229 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1230 
1231 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1232 
1233 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1234 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1235 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1236 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1237 
1238 	if (adev->gfx.rlc.is_rlc_v2_1)
1239 		gfx_v9_0_init_rlc_ext_microcode(adev);
1240 
1241 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1242 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1243 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1244 		info->fw = adev->gfx.rlc_fw;
1245 		header = (const struct common_firmware_header *)info->fw->data;
1246 		adev->firmware.fw_size +=
1247 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1248 
1249 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1250 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1251 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1252 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1253 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1254 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1255 			info->fw = adev->gfx.rlc_fw;
1256 			adev->firmware.fw_size +=
1257 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1258 
1259 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1260 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1261 			info->fw = adev->gfx.rlc_fw;
1262 			adev->firmware.fw_size +=
1263 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1264 
1265 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1266 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1267 			info->fw = adev->gfx.rlc_fw;
1268 			adev->firmware.fw_size +=
1269 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1270 		}
1271 	}
1272 
1273 out:
1274 	if (err) {
1275 		dev_err(adev->dev,
1276 			"gfx9: Failed to load firmware \"%s\"\n",
1277 			fw_name);
1278 		release_firmware(adev->gfx.rlc_fw);
1279 		adev->gfx.rlc_fw = NULL;
1280 	}
1281 	return err;
1282 }
1283 
1284 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1285 					  const char *chip_name)
1286 {
1287 	char fw_name[30];
1288 	int err;
1289 	struct amdgpu_firmware_info *info = NULL;
1290 	const struct common_firmware_header *header = NULL;
1291 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1292 
1293 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1294 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1295 	if (err)
1296 		goto out;
1297 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1298 	if (err)
1299 		goto out;
1300 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1301 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1302 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1303 
1304 
1305 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1306 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1307 	if (!err) {
1308 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1309 		if (err)
1310 			goto out;
1311 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1312 		adev->gfx.mec2_fw->data;
1313 		adev->gfx.mec2_fw_version =
1314 		le32_to_cpu(cp_hdr->header.ucode_version);
1315 		adev->gfx.mec2_feature_version =
1316 		le32_to_cpu(cp_hdr->ucode_feature_version);
1317 	} else {
1318 		err = 0;
1319 		adev->gfx.mec2_fw = NULL;
1320 	}
1321 
1322 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1323 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1324 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1325 		info->fw = adev->gfx.mec_fw;
1326 		header = (const struct common_firmware_header *)info->fw->data;
1327 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1328 		adev->firmware.fw_size +=
1329 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1330 
1331 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1332 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1333 		info->fw = adev->gfx.mec_fw;
1334 		adev->firmware.fw_size +=
1335 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1336 
1337 		if (adev->gfx.mec2_fw) {
1338 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1339 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1340 			info->fw = adev->gfx.mec2_fw;
1341 			header = (const struct common_firmware_header *)info->fw->data;
1342 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1343 			adev->firmware.fw_size +=
1344 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1345 
1346 			/* TODO: Determine if MEC2 JT FW loading can be removed
1347 				 for all GFX V9 asic and above */
1348 			if (adev->asic_type != CHIP_ARCTURUS &&
1349 			    adev->asic_type != CHIP_RENOIR) {
1350 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1351 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1352 				info->fw = adev->gfx.mec2_fw;
1353 				adev->firmware.fw_size +=
1354 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1355 					PAGE_SIZE);
1356 			}
1357 		}
1358 	}
1359 
1360 out:
1361 	gfx_v9_0_check_if_need_gfxoff(adev);
1362 	gfx_v9_0_check_fw_write_wait(adev);
1363 	if (err) {
1364 		dev_err(adev->dev,
1365 			"gfx9: Failed to load firmware \"%s\"\n",
1366 			fw_name);
1367 		release_firmware(adev->gfx.mec_fw);
1368 		adev->gfx.mec_fw = NULL;
1369 		release_firmware(adev->gfx.mec2_fw);
1370 		adev->gfx.mec2_fw = NULL;
1371 	}
1372 	return err;
1373 }
1374 
1375 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1376 {
1377 	const char *chip_name;
1378 	int r;
1379 
1380 	DRM_DEBUG("\n");
1381 
1382 	switch (adev->asic_type) {
1383 	case CHIP_VEGA10:
1384 		chip_name = "vega10";
1385 		break;
1386 	case CHIP_VEGA12:
1387 		chip_name = "vega12";
1388 		break;
1389 	case CHIP_VEGA20:
1390 		chip_name = "vega20";
1391 		break;
1392 	case CHIP_RAVEN:
1393 		if (adev->rev_id >= 8)
1394 			chip_name = "raven2";
1395 		else if (adev->pdev->device == 0x15d8)
1396 			chip_name = "picasso";
1397 		else
1398 			chip_name = "raven";
1399 		break;
1400 	case CHIP_ARCTURUS:
1401 		chip_name = "arcturus";
1402 		break;
1403 	case CHIP_RENOIR:
1404 		chip_name = "renoir";
1405 		break;
1406 	default:
1407 		BUG();
1408 	}
1409 
1410 	/* No CPG in Arcturus */
1411 	if (adev->asic_type != CHIP_ARCTURUS) {
1412 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1413 		if (r)
1414 			return r;
1415 	}
1416 
1417 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1418 	if (r)
1419 		return r;
1420 
1421 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1422 	if (r)
1423 		return r;
1424 
1425 	return r;
1426 }
1427 
1428 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1429 {
1430 	u32 count = 0;
1431 	const struct cs_section_def *sect = NULL;
1432 	const struct cs_extent_def *ext = NULL;
1433 
1434 	/* begin clear state */
1435 	count += 2;
1436 	/* context control state */
1437 	count += 3;
1438 
1439 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1440 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1441 			if (sect->id == SECT_CONTEXT)
1442 				count += 2 + ext->reg_count;
1443 			else
1444 				return 0;
1445 		}
1446 	}
1447 
1448 	/* end clear state */
1449 	count += 2;
1450 	/* clear state */
1451 	count += 2;
1452 
1453 	return count;
1454 }
1455 
1456 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1457 				    volatile u32 *buffer)
1458 {
1459 	u32 count = 0, i;
1460 	const struct cs_section_def *sect = NULL;
1461 	const struct cs_extent_def *ext = NULL;
1462 
1463 	if (adev->gfx.rlc.cs_data == NULL)
1464 		return;
1465 	if (buffer == NULL)
1466 		return;
1467 
1468 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1469 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1470 
1471 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1472 	buffer[count++] = cpu_to_le32(0x80000000);
1473 	buffer[count++] = cpu_to_le32(0x80000000);
1474 
1475 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1476 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1477 			if (sect->id == SECT_CONTEXT) {
1478 				buffer[count++] =
1479 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1480 				buffer[count++] = cpu_to_le32(ext->reg_index -
1481 						PACKET3_SET_CONTEXT_REG_START);
1482 				for (i = 0; i < ext->reg_count; i++)
1483 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1484 			} else {
1485 				return;
1486 			}
1487 		}
1488 	}
1489 
1490 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1491 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1492 
1493 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1494 	buffer[count++] = cpu_to_le32(0);
1495 }
1496 
1497 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1498 {
1499 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1500 	uint32_t pg_always_on_cu_num = 2;
1501 	uint32_t always_on_cu_num;
1502 	uint32_t i, j, k;
1503 	uint32_t mask, cu_bitmap, counter;
1504 
1505 	if (adev->flags & AMD_IS_APU)
1506 		always_on_cu_num = 4;
1507 	else if (adev->asic_type == CHIP_VEGA12)
1508 		always_on_cu_num = 8;
1509 	else
1510 		always_on_cu_num = 12;
1511 
1512 	mutex_lock(&adev->grbm_idx_mutex);
1513 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1514 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1515 			mask = 1;
1516 			cu_bitmap = 0;
1517 			counter = 0;
1518 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1519 
1520 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1521 				if (cu_info->bitmap[i][j] & mask) {
1522 					if (counter == pg_always_on_cu_num)
1523 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1524 					if (counter < always_on_cu_num)
1525 						cu_bitmap |= mask;
1526 					else
1527 						break;
1528 					counter++;
1529 				}
1530 				mask <<= 1;
1531 			}
1532 
1533 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1534 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1535 		}
1536 	}
1537 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1538 	mutex_unlock(&adev->grbm_idx_mutex);
1539 }
1540 
1541 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1542 {
1543 	uint32_t data;
1544 
1545 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1546 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1547 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1548 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1549 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1550 
1551 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1552 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1553 
1554 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1555 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1556 
1557 	mutex_lock(&adev->grbm_idx_mutex);
1558 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1559 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1560 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1561 
1562 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1563 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1564 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1565 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1566 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1567 
1568 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1569 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1570 	data &= 0x0000FFFF;
1571 	data |= 0x00C00000;
1572 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1573 
1574 	/*
1575 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1576 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1577 	 */
1578 
1579 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1580 	 * but used for RLC_LB_CNTL configuration */
1581 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1582 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1583 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1584 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1585 	mutex_unlock(&adev->grbm_idx_mutex);
1586 
1587 	gfx_v9_0_init_always_on_cu_mask(adev);
1588 }
1589 
1590 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1591 {
1592 	uint32_t data;
1593 
1594 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1595 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1596 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1597 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1598 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1599 
1600 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1601 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1602 
1603 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1604 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1605 
1606 	mutex_lock(&adev->grbm_idx_mutex);
1607 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1608 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1609 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1610 
1611 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1612 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1613 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1614 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1615 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1616 
1617 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1618 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1619 	data &= 0x0000FFFF;
1620 	data |= 0x00C00000;
1621 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1622 
1623 	/*
1624 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1625 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1626 	 */
1627 
1628 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1629 	 * but used for RLC_LB_CNTL configuration */
1630 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1631 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1632 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1633 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1634 	mutex_unlock(&adev->grbm_idx_mutex);
1635 
1636 	gfx_v9_0_init_always_on_cu_mask(adev);
1637 }
1638 
1639 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1640 {
1641 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1642 }
1643 
1644 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1645 {
1646 	return 5;
1647 }
1648 
1649 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1650 {
1651 	const struct cs_section_def *cs_data;
1652 	int r;
1653 
1654 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1655 
1656 	cs_data = adev->gfx.rlc.cs_data;
1657 
1658 	if (cs_data) {
1659 		/* init clear state block */
1660 		r = amdgpu_gfx_rlc_init_csb(adev);
1661 		if (r)
1662 			return r;
1663 	}
1664 
1665 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1666 		/* TODO: double check the cp_table_size for RV */
1667 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1668 		r = amdgpu_gfx_rlc_init_cpt(adev);
1669 		if (r)
1670 			return r;
1671 	}
1672 
1673 	switch (adev->asic_type) {
1674 	case CHIP_RAVEN:
1675 		gfx_v9_0_init_lbpw(adev);
1676 		break;
1677 	case CHIP_VEGA20:
1678 		gfx_v9_4_init_lbpw(adev);
1679 		break;
1680 	default:
1681 		break;
1682 	}
1683 
1684 	return 0;
1685 }
1686 
1687 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1688 {
1689 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1690 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1691 }
1692 
1693 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1694 {
1695 	int r;
1696 	u32 *hpd;
1697 	const __le32 *fw_data;
1698 	unsigned fw_size;
1699 	u32 *fw;
1700 	size_t mec_hpd_size;
1701 
1702 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1703 
1704 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1705 
1706 	/* take ownership of the relevant compute queues */
1707 	amdgpu_gfx_compute_queue_acquire(adev);
1708 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1709 
1710 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1711 				      AMDGPU_GEM_DOMAIN_VRAM,
1712 				      &adev->gfx.mec.hpd_eop_obj,
1713 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1714 				      (void **)&hpd);
1715 	if (r) {
1716 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1717 		gfx_v9_0_mec_fini(adev);
1718 		return r;
1719 	}
1720 
1721 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1722 
1723 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1724 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1725 
1726 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1727 
1728 	fw_data = (const __le32 *)
1729 		(adev->gfx.mec_fw->data +
1730 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1731 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1732 
1733 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1734 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1735 				      &adev->gfx.mec.mec_fw_obj,
1736 				      &adev->gfx.mec.mec_fw_gpu_addr,
1737 				      (void **)&fw);
1738 	if (r) {
1739 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1740 		gfx_v9_0_mec_fini(adev);
1741 		return r;
1742 	}
1743 
1744 	memcpy(fw, fw_data, fw_size);
1745 
1746 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1747 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1748 
1749 	return 0;
1750 }
1751 
1752 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1753 {
1754 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1755 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1756 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1757 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1758 		(SQ_IND_INDEX__FORCE_READ_MASK));
1759 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1760 }
1761 
1762 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1763 			   uint32_t wave, uint32_t thread,
1764 			   uint32_t regno, uint32_t num, uint32_t *out)
1765 {
1766 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1767 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1768 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1769 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1770 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1771 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1772 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1773 	while (num--)
1774 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1775 }
1776 
1777 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1778 {
1779 	/* type 1 wave data */
1780 	dst[(*no_fields)++] = 1;
1781 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1782 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1783 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1784 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1790 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1795 }
1796 
1797 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1798 				     uint32_t wave, uint32_t start,
1799 				     uint32_t size, uint32_t *dst)
1800 {
1801 	wave_read_regs(
1802 		adev, simd, wave, 0,
1803 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1804 }
1805 
1806 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1807 				     uint32_t wave, uint32_t thread,
1808 				     uint32_t start, uint32_t size,
1809 				     uint32_t *dst)
1810 {
1811 	wave_read_regs(
1812 		adev, simd, wave, thread,
1813 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1814 }
1815 
1816 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1817 				  u32 me, u32 pipe, u32 q, u32 vm)
1818 {
1819 	soc15_grbm_select(adev, me, pipe, q, vm);
1820 }
1821 
1822 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1823 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1824 	.select_se_sh = &gfx_v9_0_select_se_sh,
1825 	.read_wave_data = &gfx_v9_0_read_wave_data,
1826 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1827 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1828 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1829 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1830 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1831 };
1832 
1833 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1834 {
1835 	u32 gb_addr_config;
1836 	int err;
1837 
1838 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1839 
1840 	switch (adev->asic_type) {
1841 	case CHIP_VEGA10:
1842 		adev->gfx.config.max_hw_contexts = 8;
1843 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1844 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1845 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1846 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1847 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1848 		break;
1849 	case CHIP_VEGA12:
1850 		adev->gfx.config.max_hw_contexts = 8;
1851 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1852 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1853 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1854 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1855 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1856 		DRM_INFO("fix gfx.config for vega12\n");
1857 		break;
1858 	case CHIP_VEGA20:
1859 		adev->gfx.config.max_hw_contexts = 8;
1860 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1861 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1862 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1863 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1864 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1865 		gb_addr_config &= ~0xf3e777ff;
1866 		gb_addr_config |= 0x22014042;
1867 		/* check vbios table if gpu info is not available */
1868 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1869 		if (err)
1870 			return err;
1871 		break;
1872 	case CHIP_RAVEN:
1873 		adev->gfx.config.max_hw_contexts = 8;
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1878 		if (adev->rev_id >= 8)
1879 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1880 		else
1881 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1882 		break;
1883 	case CHIP_ARCTURUS:
1884 		adev->gfx.config.max_hw_contexts = 8;
1885 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1886 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1887 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1888 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1889 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1890 		gb_addr_config &= ~0xf3e777ff;
1891 		gb_addr_config |= 0x22014042;
1892 		break;
1893 	case CHIP_RENOIR:
1894 		adev->gfx.config.max_hw_contexts = 8;
1895 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1896 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1897 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1898 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1899 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1900 		gb_addr_config &= ~0xf3e777ff;
1901 		gb_addr_config |= 0x22010042;
1902 		break;
1903 	default:
1904 		BUG();
1905 		break;
1906 	}
1907 
1908 	adev->gfx.config.gb_addr_config = gb_addr_config;
1909 
1910 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1911 			REG_GET_FIELD(
1912 					adev->gfx.config.gb_addr_config,
1913 					GB_ADDR_CONFIG,
1914 					NUM_PIPES);
1915 
1916 	adev->gfx.config.max_tile_pipes =
1917 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1918 
1919 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1920 			REG_GET_FIELD(
1921 					adev->gfx.config.gb_addr_config,
1922 					GB_ADDR_CONFIG,
1923 					NUM_BANKS);
1924 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1925 			REG_GET_FIELD(
1926 					adev->gfx.config.gb_addr_config,
1927 					GB_ADDR_CONFIG,
1928 					MAX_COMPRESSED_FRAGS);
1929 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1930 			REG_GET_FIELD(
1931 					adev->gfx.config.gb_addr_config,
1932 					GB_ADDR_CONFIG,
1933 					NUM_RB_PER_SE);
1934 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1935 			REG_GET_FIELD(
1936 					adev->gfx.config.gb_addr_config,
1937 					GB_ADDR_CONFIG,
1938 					NUM_SHADER_ENGINES);
1939 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1940 			REG_GET_FIELD(
1941 					adev->gfx.config.gb_addr_config,
1942 					GB_ADDR_CONFIG,
1943 					PIPE_INTERLEAVE_SIZE));
1944 
1945 	return 0;
1946 }
1947 
1948 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949 				      int mec, int pipe, int queue)
1950 {
1951 	int r;
1952 	unsigned irq_type;
1953 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1954 
1955 	ring = &adev->gfx.compute_ring[ring_id];
1956 
1957 	/* mec0 is me1 */
1958 	ring->me = mec + 1;
1959 	ring->pipe = pipe;
1960 	ring->queue = queue;
1961 
1962 	ring->ring_obj = NULL;
1963 	ring->use_doorbell = true;
1964 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1965 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1967 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1968 
1969 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1971 		+ ring->pipe;
1972 
1973 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1974 	r = amdgpu_ring_init(adev, ring, 1024,
1975 			     &adev->gfx.eop_irq, irq_type);
1976 	if (r)
1977 		return r;
1978 
1979 
1980 	return 0;
1981 }
1982 
1983 static int gfx_v9_0_sw_init(void *handle)
1984 {
1985 	int i, j, k, r, ring_id;
1986 	struct amdgpu_ring *ring;
1987 	struct amdgpu_kiq *kiq;
1988 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989 
1990 	switch (adev->asic_type) {
1991 	case CHIP_VEGA10:
1992 	case CHIP_VEGA12:
1993 	case CHIP_VEGA20:
1994 	case CHIP_RAVEN:
1995 	case CHIP_ARCTURUS:
1996 	case CHIP_RENOIR:
1997 		adev->gfx.mec.num_mec = 2;
1998 		break;
1999 	default:
2000 		adev->gfx.mec.num_mec = 1;
2001 		break;
2002 	}
2003 
2004 	adev->gfx.mec.num_pipe_per_mec = 4;
2005 	adev->gfx.mec.num_queue_per_pipe = 8;
2006 
2007 	/* EOP Event */
2008 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2009 	if (r)
2010 		return r;
2011 
2012 	/* Privileged reg */
2013 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2014 			      &adev->gfx.priv_reg_irq);
2015 	if (r)
2016 		return r;
2017 
2018 	/* Privileged inst */
2019 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2020 			      &adev->gfx.priv_inst_irq);
2021 	if (r)
2022 		return r;
2023 
2024 	/* ECC error */
2025 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2026 			      &adev->gfx.cp_ecc_error_irq);
2027 	if (r)
2028 		return r;
2029 
2030 	/* FUE error */
2031 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2032 			      &adev->gfx.cp_ecc_error_irq);
2033 	if (r)
2034 		return r;
2035 
2036 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2037 
2038 	gfx_v9_0_scratch_init(adev);
2039 
2040 	r = gfx_v9_0_init_microcode(adev);
2041 	if (r) {
2042 		DRM_ERROR("Failed to load gfx firmware!\n");
2043 		return r;
2044 	}
2045 
2046 	r = adev->gfx.rlc.funcs->init(adev);
2047 	if (r) {
2048 		DRM_ERROR("Failed to init rlc BOs!\n");
2049 		return r;
2050 	}
2051 
2052 	r = gfx_v9_0_mec_init(adev);
2053 	if (r) {
2054 		DRM_ERROR("Failed to init MEC BOs!\n");
2055 		return r;
2056 	}
2057 
2058 	/* set up the gfx ring */
2059 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2060 		ring = &adev->gfx.gfx_ring[i];
2061 		ring->ring_obj = NULL;
2062 		if (!i)
2063 			sprintf(ring->name, "gfx");
2064 		else
2065 			sprintf(ring->name, "gfx_%d", i);
2066 		ring->use_doorbell = true;
2067 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2068 		r = amdgpu_ring_init(adev, ring, 1024,
2069 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2070 		if (r)
2071 			return r;
2072 	}
2073 
2074 	/* set up the compute queues - allocate horizontally across pipes */
2075 	ring_id = 0;
2076 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2077 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2078 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2079 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2080 					continue;
2081 
2082 				r = gfx_v9_0_compute_ring_init(adev,
2083 							       ring_id,
2084 							       i, k, j);
2085 				if (r)
2086 					return r;
2087 
2088 				ring_id++;
2089 			}
2090 		}
2091 	}
2092 
2093 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2094 	if (r) {
2095 		DRM_ERROR("Failed to init KIQ BOs!\n");
2096 		return r;
2097 	}
2098 
2099 	kiq = &adev->gfx.kiq;
2100 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2101 	if (r)
2102 		return r;
2103 
2104 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2105 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2106 	if (r)
2107 		return r;
2108 
2109 	adev->gfx.ce_ram_size = 0x8000;
2110 
2111 	r = gfx_v9_0_gpu_early_init(adev);
2112 	if (r)
2113 		return r;
2114 
2115 	return 0;
2116 }
2117 
2118 
2119 static int gfx_v9_0_sw_fini(void *handle)
2120 {
2121 	int i;
2122 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2123 
2124 	amdgpu_gfx_ras_fini(adev);
2125 
2126 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2127 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2128 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2129 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2130 
2131 	amdgpu_gfx_mqd_sw_fini(adev);
2132 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2133 	amdgpu_gfx_kiq_fini(adev);
2134 
2135 	gfx_v9_0_mec_fini(adev);
2136 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2137 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2138 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2139 				&adev->gfx.rlc.cp_table_gpu_addr,
2140 				(void **)&adev->gfx.rlc.cp_table_ptr);
2141 	}
2142 	gfx_v9_0_free_microcode(adev);
2143 
2144 	return 0;
2145 }
2146 
2147 
2148 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2149 {
2150 	/* TODO */
2151 }
2152 
2153 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2154 {
2155 	u32 data;
2156 
2157 	if (instance == 0xffffffff)
2158 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2159 	else
2160 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2161 
2162 	if (se_num == 0xffffffff)
2163 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2164 	else
2165 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2166 
2167 	if (sh_num == 0xffffffff)
2168 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2169 	else
2170 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2171 
2172 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2173 }
2174 
2175 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2176 {
2177 	u32 data, mask;
2178 
2179 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2180 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2181 
2182 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2183 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2184 
2185 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2186 					 adev->gfx.config.max_sh_per_se);
2187 
2188 	return (~data) & mask;
2189 }
2190 
2191 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2192 {
2193 	int i, j;
2194 	u32 data;
2195 	u32 active_rbs = 0;
2196 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2197 					adev->gfx.config.max_sh_per_se;
2198 
2199 	mutex_lock(&adev->grbm_idx_mutex);
2200 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2201 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2202 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2203 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2204 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2205 					       rb_bitmap_width_per_sh);
2206 		}
2207 	}
2208 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2209 	mutex_unlock(&adev->grbm_idx_mutex);
2210 
2211 	adev->gfx.config.backend_enable_mask = active_rbs;
2212 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2213 }
2214 
2215 #define DEFAULT_SH_MEM_BASES	(0x6000)
2216 #define FIRST_COMPUTE_VMID	(8)
2217 #define LAST_COMPUTE_VMID	(16)
2218 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2219 {
2220 	int i;
2221 	uint32_t sh_mem_config;
2222 	uint32_t sh_mem_bases;
2223 
2224 	/*
2225 	 * Configure apertures:
2226 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2227 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2228 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2229 	 */
2230 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2231 
2232 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2233 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2234 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2235 
2236 	mutex_lock(&adev->srbm_mutex);
2237 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2238 		soc15_grbm_select(adev, 0, 0, 0, i);
2239 		/* CP and shaders */
2240 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2241 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2242 	}
2243 	soc15_grbm_select(adev, 0, 0, 0, 0);
2244 	mutex_unlock(&adev->srbm_mutex);
2245 
2246 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2247 	   acccess. These should be enabled by FW for target VMIDs. */
2248 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2249 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2250 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2251 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2252 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2253 	}
2254 }
2255 
2256 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2257 {
2258 	int vmid;
2259 
2260 	/*
2261 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2262 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2263 	 * the driver can enable them for graphics. VMID0 should maintain
2264 	 * access so that HWS firmware can save/restore entries.
2265 	 */
2266 	for (vmid = 1; vmid < 16; vmid++) {
2267 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2268 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2269 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2270 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2271 	}
2272 }
2273 
2274 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2275 {
2276 	u32 tmp;
2277 	int i;
2278 
2279 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2280 
2281 	gfx_v9_0_tiling_mode_table_init(adev);
2282 
2283 	gfx_v9_0_setup_rb(adev);
2284 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2285 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2286 
2287 	/* XXX SH_MEM regs */
2288 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2289 	mutex_lock(&adev->srbm_mutex);
2290 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2291 		soc15_grbm_select(adev, 0, 0, 0, i);
2292 		/* CP and shaders */
2293 		if (i == 0) {
2294 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2295 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2296 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2297 					    !!amdgpu_noretry);
2298 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2299 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2300 		} else {
2301 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2302 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2303 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2304 					    !!amdgpu_noretry);
2305 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2306 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2307 				(adev->gmc.private_aperture_start >> 48));
2308 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2309 				(adev->gmc.shared_aperture_start >> 48));
2310 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2311 		}
2312 	}
2313 	soc15_grbm_select(adev, 0, 0, 0, 0);
2314 
2315 	mutex_unlock(&adev->srbm_mutex);
2316 
2317 	gfx_v9_0_init_compute_vmid(adev);
2318 	gfx_v9_0_init_gds_vmid(adev);
2319 }
2320 
2321 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2322 {
2323 	u32 i, j, k;
2324 	u32 mask;
2325 
2326 	mutex_lock(&adev->grbm_idx_mutex);
2327 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2328 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2329 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2330 			for (k = 0; k < adev->usec_timeout; k++) {
2331 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2332 					break;
2333 				udelay(1);
2334 			}
2335 			if (k == adev->usec_timeout) {
2336 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2337 						      0xffffffff, 0xffffffff);
2338 				mutex_unlock(&adev->grbm_idx_mutex);
2339 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2340 					 i, j);
2341 				return;
2342 			}
2343 		}
2344 	}
2345 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2346 	mutex_unlock(&adev->grbm_idx_mutex);
2347 
2348 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2349 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2350 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2351 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2352 	for (k = 0; k < adev->usec_timeout; k++) {
2353 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2354 			break;
2355 		udelay(1);
2356 	}
2357 }
2358 
2359 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2360 					       bool enable)
2361 {
2362 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2363 
2364 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2365 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2366 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2367 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2368 
2369 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2370 }
2371 
2372 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2373 {
2374 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2375 	/* csib */
2376 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2377 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2378 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2379 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2380 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2381 			adev->gfx.rlc.clear_state_size);
2382 }
2383 
2384 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2385 				int indirect_offset,
2386 				int list_size,
2387 				int *unique_indirect_regs,
2388 				int unique_indirect_reg_count,
2389 				int *indirect_start_offsets,
2390 				int *indirect_start_offsets_count,
2391 				int max_start_offsets_count)
2392 {
2393 	int idx;
2394 
2395 	for (; indirect_offset < list_size; indirect_offset++) {
2396 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2397 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2398 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2399 
2400 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2401 			indirect_offset += 2;
2402 
2403 			/* look for the matching indice */
2404 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2405 				if (unique_indirect_regs[idx] ==
2406 					register_list_format[indirect_offset] ||
2407 					!unique_indirect_regs[idx])
2408 					break;
2409 			}
2410 
2411 			BUG_ON(idx >= unique_indirect_reg_count);
2412 
2413 			if (!unique_indirect_regs[idx])
2414 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2415 
2416 			indirect_offset++;
2417 		}
2418 	}
2419 }
2420 
2421 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2422 {
2423 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2424 	int unique_indirect_reg_count = 0;
2425 
2426 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2427 	int indirect_start_offsets_count = 0;
2428 
2429 	int list_size = 0;
2430 	int i = 0, j = 0;
2431 	u32 tmp = 0;
2432 
2433 	u32 *register_list_format =
2434 		kmemdup(adev->gfx.rlc.register_list_format,
2435 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2436 	if (!register_list_format)
2437 		return -ENOMEM;
2438 
2439 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2440 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2441 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2442 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2443 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2444 				    unique_indirect_regs,
2445 				    unique_indirect_reg_count,
2446 				    indirect_start_offsets,
2447 				    &indirect_start_offsets_count,
2448 				    ARRAY_SIZE(indirect_start_offsets));
2449 
2450 	/* enable auto inc in case it is disabled */
2451 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2452 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2453 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2454 
2455 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2456 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2457 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2458 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2459 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2460 			adev->gfx.rlc.register_restore[i]);
2461 
2462 	/* load indirect register */
2463 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2464 		adev->gfx.rlc.reg_list_format_start);
2465 
2466 	/* direct register portion */
2467 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2468 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2469 			register_list_format[i]);
2470 
2471 	/* indirect register portion */
2472 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2473 		if (register_list_format[i] == 0xFFFFFFFF) {
2474 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2475 			continue;
2476 		}
2477 
2478 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2479 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2480 
2481 		for (j = 0; j < unique_indirect_reg_count; j++) {
2482 			if (register_list_format[i] == unique_indirect_regs[j]) {
2483 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2484 				break;
2485 			}
2486 		}
2487 
2488 		BUG_ON(j >= unique_indirect_reg_count);
2489 
2490 		i++;
2491 	}
2492 
2493 	/* set save/restore list size */
2494 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2495 	list_size = list_size >> 1;
2496 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2497 		adev->gfx.rlc.reg_restore_list_size);
2498 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2499 
2500 	/* write the starting offsets to RLC scratch ram */
2501 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2502 		adev->gfx.rlc.starting_offsets_start);
2503 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2504 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2505 		       indirect_start_offsets[i]);
2506 
2507 	/* load unique indirect regs*/
2508 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2509 		if (unique_indirect_regs[i] != 0) {
2510 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2511 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2512 			       unique_indirect_regs[i] & 0x3FFFF);
2513 
2514 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2515 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2516 			       unique_indirect_regs[i] >> 20);
2517 		}
2518 	}
2519 
2520 	kfree(register_list_format);
2521 	return 0;
2522 }
2523 
2524 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2525 {
2526 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2527 }
2528 
2529 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2530 					     bool enable)
2531 {
2532 	uint32_t data = 0;
2533 	uint32_t default_data = 0;
2534 
2535 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2536 	if (enable == true) {
2537 		/* enable GFXIP control over CGPG */
2538 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2539 		if(default_data != data)
2540 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2541 
2542 		/* update status */
2543 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2544 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2545 		if(default_data != data)
2546 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2547 	} else {
2548 		/* restore GFXIP control over GCPG */
2549 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2550 		if(default_data != data)
2551 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2552 	}
2553 }
2554 
2555 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2556 {
2557 	uint32_t data = 0;
2558 
2559 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2560 			      AMD_PG_SUPPORT_GFX_SMG |
2561 			      AMD_PG_SUPPORT_GFX_DMG)) {
2562 		/* init IDLE_POLL_COUNT = 60 */
2563 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2564 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2565 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2566 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2567 
2568 		/* init RLC PG Delay */
2569 		data = 0;
2570 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2571 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2572 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2573 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2574 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2575 
2576 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2577 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2578 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2579 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2580 
2581 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2582 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2583 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2584 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2585 
2586 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2587 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2588 
2589 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2590 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2591 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2592 
2593 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2594 	}
2595 }
2596 
2597 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2598 						bool enable)
2599 {
2600 	uint32_t data = 0;
2601 	uint32_t default_data = 0;
2602 
2603 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2604 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2605 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2606 			     enable ? 1 : 0);
2607 	if (default_data != data)
2608 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2609 }
2610 
2611 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2612 						bool enable)
2613 {
2614 	uint32_t data = 0;
2615 	uint32_t default_data = 0;
2616 
2617 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2618 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2619 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2620 			     enable ? 1 : 0);
2621 	if(default_data != data)
2622 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2623 }
2624 
2625 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2626 					bool enable)
2627 {
2628 	uint32_t data = 0;
2629 	uint32_t default_data = 0;
2630 
2631 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2632 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2633 			     CP_PG_DISABLE,
2634 			     enable ? 0 : 1);
2635 	if(default_data != data)
2636 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2637 }
2638 
2639 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2640 						bool enable)
2641 {
2642 	uint32_t data, default_data;
2643 
2644 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2645 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2646 			     GFX_POWER_GATING_ENABLE,
2647 			     enable ? 1 : 0);
2648 	if(default_data != data)
2649 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2650 }
2651 
2652 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2653 						bool enable)
2654 {
2655 	uint32_t data, default_data;
2656 
2657 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2658 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2659 			     GFX_PIPELINE_PG_ENABLE,
2660 			     enable ? 1 : 0);
2661 	if(default_data != data)
2662 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2663 
2664 	if (!enable)
2665 		/* read any GFX register to wake up GFX */
2666 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2667 }
2668 
2669 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2670 						       bool enable)
2671 {
2672 	uint32_t data, default_data;
2673 
2674 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2675 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2676 			     STATIC_PER_CU_PG_ENABLE,
2677 			     enable ? 1 : 0);
2678 	if(default_data != data)
2679 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2680 }
2681 
2682 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2683 						bool enable)
2684 {
2685 	uint32_t data, default_data;
2686 
2687 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2688 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2689 			     DYN_PER_CU_PG_ENABLE,
2690 			     enable ? 1 : 0);
2691 	if(default_data != data)
2692 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2693 }
2694 
2695 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2696 {
2697 	gfx_v9_0_init_csb(adev);
2698 
2699 	/*
2700 	 * Rlc save restore list is workable since v2_1.
2701 	 * And it's needed by gfxoff feature.
2702 	 */
2703 	if (adev->gfx.rlc.is_rlc_v2_1) {
2704 		if (adev->asic_type == CHIP_VEGA12 ||
2705 		    (adev->asic_type == CHIP_RAVEN &&
2706 		     adev->rev_id >= 8))
2707 			gfx_v9_1_init_rlc_save_restore_list(adev);
2708 		gfx_v9_0_enable_save_restore_machine(adev);
2709 	}
2710 
2711 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2712 			      AMD_PG_SUPPORT_GFX_SMG |
2713 			      AMD_PG_SUPPORT_GFX_DMG |
2714 			      AMD_PG_SUPPORT_CP |
2715 			      AMD_PG_SUPPORT_GDS |
2716 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2717 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2718 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2719 		gfx_v9_0_init_gfx_power_gating(adev);
2720 	}
2721 }
2722 
2723 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2724 {
2725 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2726 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2727 	gfx_v9_0_wait_for_rlc_serdes(adev);
2728 }
2729 
2730 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2731 {
2732 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2733 	udelay(50);
2734 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2735 	udelay(50);
2736 }
2737 
2738 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2739 {
2740 #ifdef AMDGPU_RLC_DEBUG_RETRY
2741 	u32 rlc_ucode_ver;
2742 #endif
2743 
2744 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2745 	udelay(50);
2746 
2747 	/* carrizo do enable cp interrupt after cp inited */
2748 	if (!(adev->flags & AMD_IS_APU)) {
2749 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2750 		udelay(50);
2751 	}
2752 
2753 #ifdef AMDGPU_RLC_DEBUG_RETRY
2754 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2755 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2756 	if(rlc_ucode_ver == 0x108) {
2757 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2758 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2759 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2760 		 * default is 0x9C4 to create a 100us interval */
2761 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2762 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2763 		 * to disable the page fault retry interrupts, default is
2764 		 * 0x100 (256) */
2765 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2766 	}
2767 #endif
2768 }
2769 
2770 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2771 {
2772 	const struct rlc_firmware_header_v2_0 *hdr;
2773 	const __le32 *fw_data;
2774 	unsigned i, fw_size;
2775 
2776 	if (!adev->gfx.rlc_fw)
2777 		return -EINVAL;
2778 
2779 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2780 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2781 
2782 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2783 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2784 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2785 
2786 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2787 			RLCG_UCODE_LOADING_START_ADDRESS);
2788 	for (i = 0; i < fw_size; i++)
2789 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2790 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2791 
2792 	return 0;
2793 }
2794 
2795 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2796 {
2797 	int r;
2798 
2799 	if (amdgpu_sriov_vf(adev)) {
2800 		gfx_v9_0_init_csb(adev);
2801 		return 0;
2802 	}
2803 
2804 	adev->gfx.rlc.funcs->stop(adev);
2805 
2806 	/* disable CG */
2807 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2808 
2809 	gfx_v9_0_init_pg(adev);
2810 
2811 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2812 		/* legacy rlc firmware loading */
2813 		r = gfx_v9_0_rlc_load_microcode(adev);
2814 		if (r)
2815 			return r;
2816 	}
2817 
2818 	switch (adev->asic_type) {
2819 	case CHIP_RAVEN:
2820 		if (amdgpu_lbpw == 0)
2821 			gfx_v9_0_enable_lbpw(adev, false);
2822 		else
2823 			gfx_v9_0_enable_lbpw(adev, true);
2824 		break;
2825 	case CHIP_VEGA20:
2826 		if (amdgpu_lbpw > 0)
2827 			gfx_v9_0_enable_lbpw(adev, true);
2828 		else
2829 			gfx_v9_0_enable_lbpw(adev, false);
2830 		break;
2831 	default:
2832 		break;
2833 	}
2834 
2835 	adev->gfx.rlc.funcs->start(adev);
2836 
2837 	return 0;
2838 }
2839 
2840 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2841 {
2842 	int i;
2843 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2844 
2845 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2846 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2847 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2848 	if (!enable) {
2849 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2850 			adev->gfx.gfx_ring[i].sched.ready = false;
2851 	}
2852 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2853 	udelay(50);
2854 }
2855 
2856 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2857 {
2858 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2859 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2860 	const struct gfx_firmware_header_v1_0 *me_hdr;
2861 	const __le32 *fw_data;
2862 	unsigned i, fw_size;
2863 
2864 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2865 		return -EINVAL;
2866 
2867 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2868 		adev->gfx.pfp_fw->data;
2869 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2870 		adev->gfx.ce_fw->data;
2871 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2872 		adev->gfx.me_fw->data;
2873 
2874 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2875 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2876 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2877 
2878 	gfx_v9_0_cp_gfx_enable(adev, false);
2879 
2880 	/* PFP */
2881 	fw_data = (const __le32 *)
2882 		(adev->gfx.pfp_fw->data +
2883 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2884 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2885 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2886 	for (i = 0; i < fw_size; i++)
2887 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2888 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2889 
2890 	/* CE */
2891 	fw_data = (const __le32 *)
2892 		(adev->gfx.ce_fw->data +
2893 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2894 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2895 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2896 	for (i = 0; i < fw_size; i++)
2897 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2898 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2899 
2900 	/* ME */
2901 	fw_data = (const __le32 *)
2902 		(adev->gfx.me_fw->data +
2903 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2904 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2905 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2906 	for (i = 0; i < fw_size; i++)
2907 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2908 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2909 
2910 	return 0;
2911 }
2912 
2913 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2914 {
2915 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2916 	const struct cs_section_def *sect = NULL;
2917 	const struct cs_extent_def *ext = NULL;
2918 	int r, i, tmp;
2919 
2920 	/* init the CP */
2921 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2922 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2923 
2924 	gfx_v9_0_cp_gfx_enable(adev, true);
2925 
2926 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2927 	if (r) {
2928 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2929 		return r;
2930 	}
2931 
2932 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2933 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2934 
2935 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2936 	amdgpu_ring_write(ring, 0x80000000);
2937 	amdgpu_ring_write(ring, 0x80000000);
2938 
2939 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2940 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2941 			if (sect->id == SECT_CONTEXT) {
2942 				amdgpu_ring_write(ring,
2943 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2944 					       ext->reg_count));
2945 				amdgpu_ring_write(ring,
2946 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2947 				for (i = 0; i < ext->reg_count; i++)
2948 					amdgpu_ring_write(ring, ext->extent[i]);
2949 			}
2950 		}
2951 	}
2952 
2953 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2954 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2955 
2956 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2957 	amdgpu_ring_write(ring, 0);
2958 
2959 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2960 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2961 	amdgpu_ring_write(ring, 0x8000);
2962 	amdgpu_ring_write(ring, 0x8000);
2963 
2964 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2965 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2966 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2967 	amdgpu_ring_write(ring, tmp);
2968 	amdgpu_ring_write(ring, 0);
2969 
2970 	amdgpu_ring_commit(ring);
2971 
2972 	return 0;
2973 }
2974 
2975 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2976 {
2977 	struct amdgpu_ring *ring;
2978 	u32 tmp;
2979 	u32 rb_bufsz;
2980 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2981 
2982 	/* Set the write pointer delay */
2983 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2984 
2985 	/* set the RB to use vmid 0 */
2986 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2987 
2988 	/* Set ring buffer size */
2989 	ring = &adev->gfx.gfx_ring[0];
2990 	rb_bufsz = order_base_2(ring->ring_size / 8);
2991 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2992 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2993 #ifdef __BIG_ENDIAN
2994 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2995 #endif
2996 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2997 
2998 	/* Initialize the ring buffer's write pointers */
2999 	ring->wptr = 0;
3000 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3001 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3002 
3003 	/* set the wb address wether it's enabled or not */
3004 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3005 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3006 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3007 
3008 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3009 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3010 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3011 
3012 	mdelay(1);
3013 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3014 
3015 	rb_addr = ring->gpu_addr >> 8;
3016 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3017 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3018 
3019 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3020 	if (ring->use_doorbell) {
3021 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3022 				    DOORBELL_OFFSET, ring->doorbell_index);
3023 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3024 				    DOORBELL_EN, 1);
3025 	} else {
3026 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3027 	}
3028 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3029 
3030 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3031 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3032 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3033 
3034 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3035 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3036 
3037 
3038 	/* start the ring */
3039 	gfx_v9_0_cp_gfx_start(adev);
3040 	ring->sched.ready = true;
3041 
3042 	return 0;
3043 }
3044 
3045 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3046 {
3047 	int i;
3048 
3049 	if (enable) {
3050 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3051 	} else {
3052 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3053 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3054 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3055 			adev->gfx.compute_ring[i].sched.ready = false;
3056 		adev->gfx.kiq.ring.sched.ready = false;
3057 	}
3058 	udelay(50);
3059 }
3060 
3061 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3062 {
3063 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3064 	const __le32 *fw_data;
3065 	unsigned i;
3066 	u32 tmp;
3067 
3068 	if (!adev->gfx.mec_fw)
3069 		return -EINVAL;
3070 
3071 	gfx_v9_0_cp_compute_enable(adev, false);
3072 
3073 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3074 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3075 
3076 	fw_data = (const __le32 *)
3077 		(adev->gfx.mec_fw->data +
3078 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3079 	tmp = 0;
3080 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3081 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3082 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3083 
3084 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3085 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3086 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3087 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3088 
3089 	/* MEC1 */
3090 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3091 			 mec_hdr->jt_offset);
3092 	for (i = 0; i < mec_hdr->jt_size; i++)
3093 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3094 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3095 
3096 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3097 			adev->gfx.mec_fw_version);
3098 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3099 
3100 	return 0;
3101 }
3102 
3103 /* KIQ functions */
3104 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3105 {
3106 	uint32_t tmp;
3107 	struct amdgpu_device *adev = ring->adev;
3108 
3109 	/* tell RLC which is KIQ queue */
3110 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3111 	tmp &= 0xffffff00;
3112 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3113 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3114 	tmp |= 0x80;
3115 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3116 }
3117 
3118 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3119 {
3120 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3121 	uint64_t queue_mask = 0;
3122 	int r, i;
3123 
3124 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3125 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3126 			continue;
3127 
3128 		/* This situation may be hit in the future if a new HW
3129 		 * generation exposes more than 64 queues. If so, the
3130 		 * definition of queue_mask needs updating */
3131 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3132 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3133 			break;
3134 		}
3135 
3136 		queue_mask |= (1ull << i);
3137 	}
3138 
3139 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3140 	if (r) {
3141 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3142 		return r;
3143 	}
3144 
3145 	/* set resources */
3146 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3147 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3148 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3149 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3150 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3151 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3152 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3153 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3154 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3155 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3156 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3157 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3158 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3159 
3160 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3161 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3162 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3163 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3164 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3165 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3166 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3167 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3168 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3169 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3170 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3171 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3172 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3173 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3174 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3175 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3176 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3177 	}
3178 
3179 	r = amdgpu_ring_test_helper(kiq_ring);
3180 	if (r)
3181 		DRM_ERROR("KCQ enable failed\n");
3182 
3183 	return r;
3184 }
3185 
3186 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3187 {
3188 	struct amdgpu_device *adev = ring->adev;
3189 	struct v9_mqd *mqd = ring->mqd_ptr;
3190 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3191 	uint32_t tmp;
3192 
3193 	mqd->header = 0xC0310800;
3194 	mqd->compute_pipelinestat_enable = 0x00000001;
3195 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3196 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3197 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3198 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3199 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3200 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3201 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3202 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3203 	mqd->compute_misc_reserved = 0x00000003;
3204 
3205 	mqd->dynamic_cu_mask_addr_lo =
3206 		lower_32_bits(ring->mqd_gpu_addr
3207 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3208 	mqd->dynamic_cu_mask_addr_hi =
3209 		upper_32_bits(ring->mqd_gpu_addr
3210 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3211 
3212 	eop_base_addr = ring->eop_gpu_addr >> 8;
3213 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3214 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3215 
3216 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3217 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3218 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3219 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3220 
3221 	mqd->cp_hqd_eop_control = tmp;
3222 
3223 	/* enable doorbell? */
3224 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3225 
3226 	if (ring->use_doorbell) {
3227 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3228 				    DOORBELL_OFFSET, ring->doorbell_index);
3229 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3230 				    DOORBELL_EN, 1);
3231 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3232 				    DOORBELL_SOURCE, 0);
3233 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3234 				    DOORBELL_HIT, 0);
3235 	} else {
3236 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3237 					 DOORBELL_EN, 0);
3238 	}
3239 
3240 	mqd->cp_hqd_pq_doorbell_control = tmp;
3241 
3242 	/* disable the queue if it's active */
3243 	ring->wptr = 0;
3244 	mqd->cp_hqd_dequeue_request = 0;
3245 	mqd->cp_hqd_pq_rptr = 0;
3246 	mqd->cp_hqd_pq_wptr_lo = 0;
3247 	mqd->cp_hqd_pq_wptr_hi = 0;
3248 
3249 	/* set the pointer to the MQD */
3250 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3251 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3252 
3253 	/* set MQD vmid to 0 */
3254 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3255 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3256 	mqd->cp_mqd_control = tmp;
3257 
3258 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3259 	hqd_gpu_addr = ring->gpu_addr >> 8;
3260 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3261 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3262 
3263 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3264 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3265 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3266 			    (order_base_2(ring->ring_size / 4) - 1));
3267 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3268 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3269 #ifdef __BIG_ENDIAN
3270 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3271 #endif
3272 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3273 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3274 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3275 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3276 	mqd->cp_hqd_pq_control = tmp;
3277 
3278 	/* set the wb address whether it's enabled or not */
3279 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3280 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3281 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3282 		upper_32_bits(wb_gpu_addr) & 0xffff;
3283 
3284 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3285 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3286 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3287 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3288 
3289 	tmp = 0;
3290 	/* enable the doorbell if requested */
3291 	if (ring->use_doorbell) {
3292 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3293 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3294 				DOORBELL_OFFSET, ring->doorbell_index);
3295 
3296 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3297 					 DOORBELL_EN, 1);
3298 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3299 					 DOORBELL_SOURCE, 0);
3300 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3301 					 DOORBELL_HIT, 0);
3302 	}
3303 
3304 	mqd->cp_hqd_pq_doorbell_control = tmp;
3305 
3306 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3307 	ring->wptr = 0;
3308 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3309 
3310 	/* set the vmid for the queue */
3311 	mqd->cp_hqd_vmid = 0;
3312 
3313 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3314 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3315 	mqd->cp_hqd_persistent_state = tmp;
3316 
3317 	/* set MIN_IB_AVAIL_SIZE */
3318 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3319 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3320 	mqd->cp_hqd_ib_control = tmp;
3321 
3322 	/* activate the queue */
3323 	mqd->cp_hqd_active = 1;
3324 
3325 	return 0;
3326 }
3327 
3328 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3329 {
3330 	struct amdgpu_device *adev = ring->adev;
3331 	struct v9_mqd *mqd = ring->mqd_ptr;
3332 	int j;
3333 
3334 	/* disable wptr polling */
3335 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3336 
3337 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3338 	       mqd->cp_hqd_eop_base_addr_lo);
3339 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3340 	       mqd->cp_hqd_eop_base_addr_hi);
3341 
3342 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3343 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3344 	       mqd->cp_hqd_eop_control);
3345 
3346 	/* enable doorbell? */
3347 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3348 	       mqd->cp_hqd_pq_doorbell_control);
3349 
3350 	/* disable the queue if it's active */
3351 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3352 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3353 		for (j = 0; j < adev->usec_timeout; j++) {
3354 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3355 				break;
3356 			udelay(1);
3357 		}
3358 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3359 		       mqd->cp_hqd_dequeue_request);
3360 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3361 		       mqd->cp_hqd_pq_rptr);
3362 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3363 		       mqd->cp_hqd_pq_wptr_lo);
3364 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3365 		       mqd->cp_hqd_pq_wptr_hi);
3366 	}
3367 
3368 	/* set the pointer to the MQD */
3369 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3370 	       mqd->cp_mqd_base_addr_lo);
3371 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3372 	       mqd->cp_mqd_base_addr_hi);
3373 
3374 	/* set MQD vmid to 0 */
3375 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3376 	       mqd->cp_mqd_control);
3377 
3378 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3379 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3380 	       mqd->cp_hqd_pq_base_lo);
3381 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3382 	       mqd->cp_hqd_pq_base_hi);
3383 
3384 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3385 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3386 	       mqd->cp_hqd_pq_control);
3387 
3388 	/* set the wb address whether it's enabled or not */
3389 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3390 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3391 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3392 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3393 
3394 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3395 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3396 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3397 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3398 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3399 
3400 	/* enable the doorbell if requested */
3401 	if (ring->use_doorbell) {
3402 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3403 					(adev->doorbell_index.kiq * 2) << 2);
3404 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3405 					(adev->doorbell_index.userqueue_end * 2) << 2);
3406 	}
3407 
3408 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3409 	       mqd->cp_hqd_pq_doorbell_control);
3410 
3411 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3412 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3413 	       mqd->cp_hqd_pq_wptr_lo);
3414 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3415 	       mqd->cp_hqd_pq_wptr_hi);
3416 
3417 	/* set the vmid for the queue */
3418 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3419 
3420 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3421 	       mqd->cp_hqd_persistent_state);
3422 
3423 	/* activate the queue */
3424 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3425 	       mqd->cp_hqd_active);
3426 
3427 	if (ring->use_doorbell)
3428 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3429 
3430 	return 0;
3431 }
3432 
3433 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3434 {
3435 	struct amdgpu_device *adev = ring->adev;
3436 	int j;
3437 
3438 	/* disable the queue if it's active */
3439 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3440 
3441 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3442 
3443 		for (j = 0; j < adev->usec_timeout; j++) {
3444 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3445 				break;
3446 			udelay(1);
3447 		}
3448 
3449 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3450 			DRM_DEBUG("KIQ dequeue request failed.\n");
3451 
3452 			/* Manual disable if dequeue request times out */
3453 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3454 		}
3455 
3456 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3457 		      0);
3458 	}
3459 
3460 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3462 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3463 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3464 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3465 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3466 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3468 
3469 	return 0;
3470 }
3471 
3472 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3473 {
3474 	struct amdgpu_device *adev = ring->adev;
3475 	struct v9_mqd *mqd = ring->mqd_ptr;
3476 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3477 
3478 	gfx_v9_0_kiq_setting(ring);
3479 
3480 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3481 		/* reset MQD to a clean status */
3482 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3483 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3484 
3485 		/* reset ring buffer */
3486 		ring->wptr = 0;
3487 		amdgpu_ring_clear_ring(ring);
3488 
3489 		mutex_lock(&adev->srbm_mutex);
3490 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3491 		gfx_v9_0_kiq_init_register(ring);
3492 		soc15_grbm_select(adev, 0, 0, 0, 0);
3493 		mutex_unlock(&adev->srbm_mutex);
3494 	} else {
3495 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3496 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3497 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3498 		mutex_lock(&adev->srbm_mutex);
3499 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3500 		gfx_v9_0_mqd_init(ring);
3501 		gfx_v9_0_kiq_init_register(ring);
3502 		soc15_grbm_select(adev, 0, 0, 0, 0);
3503 		mutex_unlock(&adev->srbm_mutex);
3504 
3505 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3506 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3507 	}
3508 
3509 	return 0;
3510 }
3511 
3512 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3513 {
3514 	struct amdgpu_device *adev = ring->adev;
3515 	struct v9_mqd *mqd = ring->mqd_ptr;
3516 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3517 
3518 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3519 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3520 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3521 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3522 		mutex_lock(&adev->srbm_mutex);
3523 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3524 		gfx_v9_0_mqd_init(ring);
3525 		soc15_grbm_select(adev, 0, 0, 0, 0);
3526 		mutex_unlock(&adev->srbm_mutex);
3527 
3528 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3529 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3530 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3531 		/* reset MQD to a clean status */
3532 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3533 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3534 
3535 		/* reset ring buffer */
3536 		ring->wptr = 0;
3537 		amdgpu_ring_clear_ring(ring);
3538 	} else {
3539 		amdgpu_ring_clear_ring(ring);
3540 	}
3541 
3542 	return 0;
3543 }
3544 
3545 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3546 {
3547 	struct amdgpu_ring *ring;
3548 	int r;
3549 
3550 	ring = &adev->gfx.kiq.ring;
3551 
3552 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3553 	if (unlikely(r != 0))
3554 		return r;
3555 
3556 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3557 	if (unlikely(r != 0))
3558 		return r;
3559 
3560 	gfx_v9_0_kiq_init_queue(ring);
3561 	amdgpu_bo_kunmap(ring->mqd_obj);
3562 	ring->mqd_ptr = NULL;
3563 	amdgpu_bo_unreserve(ring->mqd_obj);
3564 	ring->sched.ready = true;
3565 	return 0;
3566 }
3567 
3568 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3569 {
3570 	struct amdgpu_ring *ring = NULL;
3571 	int r = 0, i;
3572 
3573 	gfx_v9_0_cp_compute_enable(adev, true);
3574 
3575 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3576 		ring = &adev->gfx.compute_ring[i];
3577 
3578 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3579 		if (unlikely(r != 0))
3580 			goto done;
3581 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3582 		if (!r) {
3583 			r = gfx_v9_0_kcq_init_queue(ring);
3584 			amdgpu_bo_kunmap(ring->mqd_obj);
3585 			ring->mqd_ptr = NULL;
3586 		}
3587 		amdgpu_bo_unreserve(ring->mqd_obj);
3588 		if (r)
3589 			goto done;
3590 	}
3591 
3592 	r = gfx_v9_0_kiq_kcq_enable(adev);
3593 done:
3594 	return r;
3595 }
3596 
3597 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3598 {
3599 	int r, i;
3600 	struct amdgpu_ring *ring;
3601 
3602 	if (!(adev->flags & AMD_IS_APU))
3603 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3604 
3605 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3606 		if (adev->asic_type != CHIP_ARCTURUS) {
3607 			/* legacy firmware loading */
3608 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3609 			if (r)
3610 				return r;
3611 		}
3612 
3613 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3614 		if (r)
3615 			return r;
3616 	}
3617 
3618 	r = gfx_v9_0_kiq_resume(adev);
3619 	if (r)
3620 		return r;
3621 
3622 	if (adev->asic_type != CHIP_ARCTURUS) {
3623 		r = gfx_v9_0_cp_gfx_resume(adev);
3624 		if (r)
3625 			return r;
3626 	}
3627 
3628 	r = gfx_v9_0_kcq_resume(adev);
3629 	if (r)
3630 		return r;
3631 
3632 	if (adev->asic_type != CHIP_ARCTURUS) {
3633 		ring = &adev->gfx.gfx_ring[0];
3634 		r = amdgpu_ring_test_helper(ring);
3635 		if (r)
3636 			return r;
3637 	}
3638 
3639 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3640 		ring = &adev->gfx.compute_ring[i];
3641 		amdgpu_ring_test_helper(ring);
3642 	}
3643 
3644 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3645 
3646 	return 0;
3647 }
3648 
3649 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3650 {
3651 	if (adev->asic_type != CHIP_ARCTURUS)
3652 		gfx_v9_0_cp_gfx_enable(adev, enable);
3653 	gfx_v9_0_cp_compute_enable(adev, enable);
3654 }
3655 
3656 static int gfx_v9_0_hw_init(void *handle)
3657 {
3658 	int r;
3659 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3660 
3661 	if (!amdgpu_sriov_vf(adev))
3662 		gfx_v9_0_init_golden_registers(adev);
3663 
3664 	gfx_v9_0_constants_init(adev);
3665 
3666 	r = adev->gfx.rlc.funcs->resume(adev);
3667 	if (r)
3668 		return r;
3669 
3670 	r = gfx_v9_0_cp_resume(adev);
3671 	if (r)
3672 		return r;
3673 
3674 	return r;
3675 }
3676 
3677 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3678 {
3679 	int r, i;
3680 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3681 
3682 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3683 	if (r)
3684 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3685 
3686 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3687 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3688 
3689 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3690 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3691 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3692 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3693 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3694 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3695 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3696 		amdgpu_ring_write(kiq_ring, 0);
3697 		amdgpu_ring_write(kiq_ring, 0);
3698 		amdgpu_ring_write(kiq_ring, 0);
3699 	}
3700 	r = amdgpu_ring_test_helper(kiq_ring);
3701 	if (r)
3702 		DRM_ERROR("KCQ disable failed\n");
3703 
3704 	return r;
3705 }
3706 
3707 static int gfx_v9_0_hw_fini(void *handle)
3708 {
3709 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3710 
3711 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3712 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3713 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3714 
3715 	/* DF freeze and kcq disable will fail */
3716 	if (!amdgpu_ras_intr_triggered())
3717 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3718 		gfx_v9_0_kcq_disable(adev);
3719 
3720 	if (amdgpu_sriov_vf(adev)) {
3721 		gfx_v9_0_cp_gfx_enable(adev, false);
3722 		/* must disable polling for SRIOV when hw finished, otherwise
3723 		 * CPC engine may still keep fetching WB address which is already
3724 		 * invalid after sw finished and trigger DMAR reading error in
3725 		 * hypervisor side.
3726 		 */
3727 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3728 		return 0;
3729 	}
3730 
3731 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3732 	 * otherwise KIQ is hanging when binding back
3733 	 */
3734 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3735 		mutex_lock(&adev->srbm_mutex);
3736 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3737 				adev->gfx.kiq.ring.pipe,
3738 				adev->gfx.kiq.ring.queue, 0);
3739 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3740 		soc15_grbm_select(adev, 0, 0, 0, 0);
3741 		mutex_unlock(&adev->srbm_mutex);
3742 	}
3743 
3744 	gfx_v9_0_cp_enable(adev, false);
3745 	adev->gfx.rlc.funcs->stop(adev);
3746 
3747 	return 0;
3748 }
3749 
3750 static int gfx_v9_0_suspend(void *handle)
3751 {
3752 	return gfx_v9_0_hw_fini(handle);
3753 }
3754 
3755 static int gfx_v9_0_resume(void *handle)
3756 {
3757 	return gfx_v9_0_hw_init(handle);
3758 }
3759 
3760 static bool gfx_v9_0_is_idle(void *handle)
3761 {
3762 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3763 
3764 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3765 				GRBM_STATUS, GUI_ACTIVE))
3766 		return false;
3767 	else
3768 		return true;
3769 }
3770 
3771 static int gfx_v9_0_wait_for_idle(void *handle)
3772 {
3773 	unsigned i;
3774 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3775 
3776 	for (i = 0; i < adev->usec_timeout; i++) {
3777 		if (gfx_v9_0_is_idle(handle))
3778 			return 0;
3779 		udelay(1);
3780 	}
3781 	return -ETIMEDOUT;
3782 }
3783 
3784 static int gfx_v9_0_soft_reset(void *handle)
3785 {
3786 	u32 grbm_soft_reset = 0;
3787 	u32 tmp;
3788 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3789 
3790 	/* GRBM_STATUS */
3791 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3792 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3793 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3794 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3795 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3796 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3797 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3798 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3799 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3800 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3801 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3802 	}
3803 
3804 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3805 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3806 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3807 	}
3808 
3809 	/* GRBM_STATUS2 */
3810 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3811 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3812 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3813 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3814 
3815 
3816 	if (grbm_soft_reset) {
3817 		/* stop the rlc */
3818 		adev->gfx.rlc.funcs->stop(adev);
3819 
3820 		if (adev->asic_type != CHIP_ARCTURUS)
3821 			/* Disable GFX parsing/prefetching */
3822 			gfx_v9_0_cp_gfx_enable(adev, false);
3823 
3824 		/* Disable MEC parsing/prefetching */
3825 		gfx_v9_0_cp_compute_enable(adev, false);
3826 
3827 		if (grbm_soft_reset) {
3828 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3829 			tmp |= grbm_soft_reset;
3830 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3831 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3832 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3833 
3834 			udelay(50);
3835 
3836 			tmp &= ~grbm_soft_reset;
3837 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3838 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3839 		}
3840 
3841 		/* Wait a little for things to settle down */
3842 		udelay(50);
3843 	}
3844 	return 0;
3845 }
3846 
3847 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3848 {
3849 	uint64_t clock;
3850 
3851 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3852 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3853 		uint32_t tmp, lsb, msb, i = 0;
3854 		do {
3855 			if (i != 0)
3856 				udelay(1);
3857 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3858 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3859 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3860 			i++;
3861 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3862 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3863 	} else {
3864 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3865 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3866 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3867 	}
3868 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3869 	return clock;
3870 }
3871 
3872 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3873 					  uint32_t vmid,
3874 					  uint32_t gds_base, uint32_t gds_size,
3875 					  uint32_t gws_base, uint32_t gws_size,
3876 					  uint32_t oa_base, uint32_t oa_size)
3877 {
3878 	struct amdgpu_device *adev = ring->adev;
3879 
3880 	/* GDS Base */
3881 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3882 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3883 				   gds_base);
3884 
3885 	/* GDS Size */
3886 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3887 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3888 				   gds_size);
3889 
3890 	/* GWS */
3891 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3892 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3893 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3894 
3895 	/* OA */
3896 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3897 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3898 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3899 }
3900 
3901 static const u32 vgpr_init_compute_shader[] =
3902 {
3903 	0xb07c0000, 0xbe8000ff,
3904 	0x000000f8, 0xbf110800,
3905 	0x7e000280, 0x7e020280,
3906 	0x7e040280, 0x7e060280,
3907 	0x7e080280, 0x7e0a0280,
3908 	0x7e0c0280, 0x7e0e0280,
3909 	0x80808800, 0xbe803200,
3910 	0xbf84fff5, 0xbf9c0000,
3911 	0xd28c0001, 0x0001007f,
3912 	0xd28d0001, 0x0002027e,
3913 	0x10020288, 0xb8810904,
3914 	0xb7814000, 0xd1196a01,
3915 	0x00000301, 0xbe800087,
3916 	0xbefc00c1, 0xd89c4000,
3917 	0x00020201, 0xd89cc080,
3918 	0x00040401, 0x320202ff,
3919 	0x00000800, 0x80808100,
3920 	0xbf84fff8, 0x7e020280,
3921 	0xbf810000, 0x00000000,
3922 };
3923 
3924 static const u32 sgpr_init_compute_shader[] =
3925 {
3926 	0xb07c0000, 0xbe8000ff,
3927 	0x0000005f, 0xbee50080,
3928 	0xbe812c65, 0xbe822c65,
3929 	0xbe832c65, 0xbe842c65,
3930 	0xbe852c65, 0xb77c0005,
3931 	0x80808500, 0xbf84fff8,
3932 	0xbe800080, 0xbf810000,
3933 };
3934 
3935 /* When below register arrays changed, please update gpr_reg_size,
3936   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
3937   to cover all gfx9 ASICs */
3938 static const struct soc15_reg_entry vgpr_init_regs[] = {
3939    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3940    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3941    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
3942    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3943    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
3944    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3945    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3946    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3947    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3948    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3949    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
3950    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
3951    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
3952    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
3953 };
3954 
3955 static const struct soc15_reg_entry sgpr1_init_regs[] = {
3956    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3957    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3958    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
3959    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3960    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
3961    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3962    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
3963    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
3964    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
3965    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
3966    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
3967    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
3968    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
3969    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
3970 };
3971 
3972 static const struct soc15_reg_entry sgpr2_init_regs[] = {
3973    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3974    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3975    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
3976    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3977    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
3978    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3979    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
3980    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
3981    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
3982    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
3983    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
3984    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
3987 };
3988 
3989 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3990    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3991    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3992    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3993    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3994    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3995    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3996    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3997    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3998    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3999    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4000    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4001    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4002    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4003    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4004    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4005    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4006    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4007    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4008    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4009    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4010    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4011    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4012    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4013    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4014    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4015    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4016    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4017    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4018    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4019    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4020    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4021    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4022    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4023    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4024 };
4025 
4026 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4027 {
4028 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4029 	int i, r;
4030 
4031 	/* only support when RAS is enabled */
4032 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4033 		return 0;
4034 
4035 	r = amdgpu_ring_alloc(ring, 7);
4036 	if (r) {
4037 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4038 			ring->name, r);
4039 		return r;
4040 	}
4041 
4042 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4043 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4044 
4045 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4046 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4047 				PACKET3_DMA_DATA_DST_SEL(1) |
4048 				PACKET3_DMA_DATA_SRC_SEL(2) |
4049 				PACKET3_DMA_DATA_ENGINE(0)));
4050 	amdgpu_ring_write(ring, 0);
4051 	amdgpu_ring_write(ring, 0);
4052 	amdgpu_ring_write(ring, 0);
4053 	amdgpu_ring_write(ring, 0);
4054 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4055 				adev->gds.gds_size);
4056 
4057 	amdgpu_ring_commit(ring);
4058 
4059 	for (i = 0; i < adev->usec_timeout; i++) {
4060 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4061 			break;
4062 		udelay(1);
4063 	}
4064 
4065 	if (i >= adev->usec_timeout)
4066 		r = -ETIMEDOUT;
4067 
4068 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4069 
4070 	return r;
4071 }
4072 
4073 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4074 {
4075 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4076 	struct amdgpu_ib ib;
4077 	struct dma_fence *f = NULL;
4078 	int r, i, j, k;
4079 	unsigned total_size, vgpr_offset, sgpr_offset;
4080 	u64 gpu_addr;
4081 
4082 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4083 						adev->gfx.config.max_cu_per_sh *
4084 						adev->gfx.config.max_sh_per_se;
4085 	int sgpr_work_group_size = 5;
4086 	int gpr_reg_size = compute_dim_x / 16 + 6;
4087 
4088 	/* only support when RAS is enabled */
4089 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4090 		return 0;
4091 
4092 	/* bail if the compute ring is not ready */
4093 	if (!ring->sched.ready)
4094 		return 0;
4095 
4096 	total_size =
4097 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4098 	total_size +=
4099 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4100 	total_size +=
4101 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4102 	total_size = ALIGN(total_size, 256);
4103 	vgpr_offset = total_size;
4104 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4105 	sgpr_offset = total_size;
4106 	total_size += sizeof(sgpr_init_compute_shader);
4107 
4108 	/* allocate an indirect buffer to put the commands in */
4109 	memset(&ib, 0, sizeof(ib));
4110 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4111 	if (r) {
4112 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4113 		return r;
4114 	}
4115 
4116 	/* load the compute shaders */
4117 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4118 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4119 
4120 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4121 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4122 
4123 	/* init the ib length to 0 */
4124 	ib.length_dw = 0;
4125 
4126 	/* VGPR */
4127 	/* write the register state for the compute dispatch */
4128 	for (i = 0; i < gpr_reg_size; i++) {
4129 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4130 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4131 								- PACKET3_SET_SH_REG_START;
4132 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4133 	}
4134 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4135 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4136 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4137 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4138 							- PACKET3_SET_SH_REG_START;
4139 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4140 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4141 
4142 	/* write dispatch packet */
4143 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4144 	ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4145 	ib.ptr[ib.length_dw++] = 1; /* y */
4146 	ib.ptr[ib.length_dw++] = 1; /* z */
4147 	ib.ptr[ib.length_dw++] =
4148 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4149 
4150 	/* write CS partial flush packet */
4151 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4152 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4153 
4154 	/* SGPR1 */
4155 	/* write the register state for the compute dispatch */
4156 	for (i = 0; i < gpr_reg_size; i++) {
4157 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4158 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4159 								- PACKET3_SET_SH_REG_START;
4160 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4161 	}
4162 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4163 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4164 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4165 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4166 							- PACKET3_SET_SH_REG_START;
4167 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4168 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4169 
4170 	/* write dispatch packet */
4171 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4172 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4173 	ib.ptr[ib.length_dw++] = 1; /* y */
4174 	ib.ptr[ib.length_dw++] = 1; /* z */
4175 	ib.ptr[ib.length_dw++] =
4176 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4177 
4178 	/* write CS partial flush packet */
4179 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4180 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4181 
4182 	/* SGPR2 */
4183 	/* write the register state for the compute dispatch */
4184 	for (i = 0; i < gpr_reg_size; i++) {
4185 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4186 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4187 								- PACKET3_SET_SH_REG_START;
4188 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4189 	}
4190 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4191 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4192 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4193 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4194 							- PACKET3_SET_SH_REG_START;
4195 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4196 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4197 
4198 	/* write dispatch packet */
4199 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4200 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4201 	ib.ptr[ib.length_dw++] = 1; /* y */
4202 	ib.ptr[ib.length_dw++] = 1; /* z */
4203 	ib.ptr[ib.length_dw++] =
4204 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4205 
4206 	/* write CS partial flush packet */
4207 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4208 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4209 
4210 	/* shedule the ib on the ring */
4211 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4212 	if (r) {
4213 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4214 		goto fail;
4215 	}
4216 
4217 	/* wait for the GPU to finish processing the IB */
4218 	r = dma_fence_wait(f, false);
4219 	if (r) {
4220 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4221 		goto fail;
4222 	}
4223 
4224 	/* read back registers to clear the counters */
4225 	mutex_lock(&adev->grbm_idx_mutex);
4226 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4227 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4228 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4229 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4230 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4231 			}
4232 		}
4233 	}
4234 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4235 	mutex_unlock(&adev->grbm_idx_mutex);
4236 
4237 fail:
4238 	amdgpu_ib_free(adev, &ib, NULL);
4239 	dma_fence_put(f);
4240 
4241 	return r;
4242 }
4243 
4244 static int gfx_v9_0_early_init(void *handle)
4245 {
4246 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4247 
4248 	if (adev->asic_type == CHIP_ARCTURUS)
4249 		adev->gfx.num_gfx_rings = 0;
4250 	else
4251 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4252 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4253 	gfx_v9_0_set_ring_funcs(adev);
4254 	gfx_v9_0_set_irq_funcs(adev);
4255 	gfx_v9_0_set_gds_init(adev);
4256 	gfx_v9_0_set_rlc_funcs(adev);
4257 
4258 	return 0;
4259 }
4260 
4261 static int gfx_v9_0_ecc_late_init(void *handle)
4262 {
4263 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4264 	int r;
4265 
4266 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4267 	if (r)
4268 		return r;
4269 
4270 	/* requires IBs so do in late init after IB pool is initialized */
4271 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4272 	if (r)
4273 		return r;
4274 
4275 	r = amdgpu_gfx_ras_late_init(adev);
4276 	if (r)
4277 		return r;
4278 
4279 	return 0;
4280 }
4281 
4282 static int gfx_v9_0_late_init(void *handle)
4283 {
4284 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4285 	int r;
4286 
4287 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4288 	if (r)
4289 		return r;
4290 
4291 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4292 	if (r)
4293 		return r;
4294 
4295 	r = gfx_v9_0_ecc_late_init(handle);
4296 	if (r)
4297 		return r;
4298 
4299 	return 0;
4300 }
4301 
4302 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4303 {
4304 	uint32_t rlc_setting;
4305 
4306 	/* if RLC is not enabled, do nothing */
4307 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4308 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4309 		return false;
4310 
4311 	return true;
4312 }
4313 
4314 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4315 {
4316 	uint32_t data;
4317 	unsigned i;
4318 
4319 	data = RLC_SAFE_MODE__CMD_MASK;
4320 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4321 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4322 
4323 	/* wait for RLC_SAFE_MODE */
4324 	for (i = 0; i < adev->usec_timeout; i++) {
4325 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4326 			break;
4327 		udelay(1);
4328 	}
4329 }
4330 
4331 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4332 {
4333 	uint32_t data;
4334 
4335 	data = RLC_SAFE_MODE__CMD_MASK;
4336 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4337 }
4338 
4339 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4340 						bool enable)
4341 {
4342 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4343 
4344 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4345 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4346 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4347 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4348 	} else {
4349 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4350 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4351 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4352 	}
4353 
4354 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4355 }
4356 
4357 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4358 						bool enable)
4359 {
4360 	/* TODO: double check if we need to perform under safe mode */
4361 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4362 
4363 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4364 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4365 	else
4366 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4367 
4368 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4369 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4370 	else
4371 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4372 
4373 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4374 }
4375 
4376 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4377 						      bool enable)
4378 {
4379 	uint32_t data, def;
4380 
4381 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4382 
4383 	/* It is disabled by HW by default */
4384 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4385 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4386 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4387 
4388 		if (adev->asic_type != CHIP_VEGA12)
4389 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4390 
4391 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4392 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4393 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4394 
4395 		/* only for Vega10 & Raven1 */
4396 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4397 
4398 		if (def != data)
4399 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4400 
4401 		/* MGLS is a global flag to control all MGLS in GFX */
4402 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4403 			/* 2 - RLC memory Light sleep */
4404 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4405 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4406 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4407 				if (def != data)
4408 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4409 			}
4410 			/* 3 - CP memory Light sleep */
4411 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4412 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4413 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4414 				if (def != data)
4415 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4416 			}
4417 		}
4418 	} else {
4419 		/* 1 - MGCG_OVERRIDE */
4420 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4421 
4422 		if (adev->asic_type != CHIP_VEGA12)
4423 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4424 
4425 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4426 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4427 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4428 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4429 
4430 		if (def != data)
4431 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4432 
4433 		/* 2 - disable MGLS in RLC */
4434 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4435 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4436 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4437 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4438 		}
4439 
4440 		/* 3 - disable MGLS in CP */
4441 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4442 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4443 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4444 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4445 		}
4446 	}
4447 
4448 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4449 }
4450 
4451 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4452 					   bool enable)
4453 {
4454 	uint32_t data, def;
4455 
4456 	if (adev->asic_type == CHIP_ARCTURUS)
4457 		return;
4458 
4459 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4460 
4461 	/* Enable 3D CGCG/CGLS */
4462 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4463 		/* write cmd to clear cgcg/cgls ov */
4464 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4465 		/* unset CGCG override */
4466 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4467 		/* update CGCG and CGLS override bits */
4468 		if (def != data)
4469 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4470 
4471 		/* enable 3Dcgcg FSM(0x0000363f) */
4472 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4473 
4474 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4475 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4476 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4477 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4478 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4479 		if (def != data)
4480 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4481 
4482 		/* set IDLE_POLL_COUNT(0x00900100) */
4483 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4484 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4485 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4486 		if (def != data)
4487 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4488 	} else {
4489 		/* Disable CGCG/CGLS */
4490 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4491 		/* disable cgcg, cgls should be disabled */
4492 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4493 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4494 		/* disable cgcg and cgls in FSM */
4495 		if (def != data)
4496 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4497 	}
4498 
4499 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4500 }
4501 
4502 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4503 						      bool enable)
4504 {
4505 	uint32_t def, data;
4506 
4507 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4508 
4509 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4510 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4511 		/* unset CGCG override */
4512 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4513 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4514 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4515 		else
4516 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4517 		/* update CGCG and CGLS override bits */
4518 		if (def != data)
4519 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4520 
4521 		/* enable cgcg FSM(0x0000363F) */
4522 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4523 
4524 		if (adev->asic_type == CHIP_ARCTURUS)
4525 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4526 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4527 		else
4528 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4529 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4530 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4531 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4532 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4533 		if (def != data)
4534 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4535 
4536 		/* set IDLE_POLL_COUNT(0x00900100) */
4537 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4538 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4539 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4540 		if (def != data)
4541 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4542 	} else {
4543 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4544 		/* reset CGCG/CGLS bits */
4545 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4546 		/* disable cgcg and cgls in FSM */
4547 		if (def != data)
4548 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4549 	}
4550 
4551 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4552 }
4553 
4554 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4555 					    bool enable)
4556 {
4557 	if (enable) {
4558 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4559 		 * ===  MGCG + MGLS ===
4560 		 */
4561 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4562 		/* ===  CGCG /CGLS for GFX 3D Only === */
4563 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4564 		/* ===  CGCG + CGLS === */
4565 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4566 	} else {
4567 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4568 		 * ===  CGCG + CGLS ===
4569 		 */
4570 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4571 		/* ===  CGCG /CGLS for GFX 3D Only === */
4572 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4573 		/* ===  MGCG + MGLS === */
4574 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4575 	}
4576 	return 0;
4577 }
4578 
4579 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4580 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4581 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4582 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4583 	.init = gfx_v9_0_rlc_init,
4584 	.get_csb_size = gfx_v9_0_get_csb_size,
4585 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4586 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4587 	.resume = gfx_v9_0_rlc_resume,
4588 	.stop = gfx_v9_0_rlc_stop,
4589 	.reset = gfx_v9_0_rlc_reset,
4590 	.start = gfx_v9_0_rlc_start
4591 };
4592 
4593 static int gfx_v9_0_set_powergating_state(void *handle,
4594 					  enum amd_powergating_state state)
4595 {
4596 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4597 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4598 
4599 	switch (adev->asic_type) {
4600 	case CHIP_RAVEN:
4601 	case CHIP_RENOIR:
4602 		if (!enable) {
4603 			amdgpu_gfx_off_ctrl(adev, false);
4604 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4605 		}
4606 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4607 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4608 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4609 		} else {
4610 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4611 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4612 		}
4613 
4614 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4615 			gfx_v9_0_enable_cp_power_gating(adev, true);
4616 		else
4617 			gfx_v9_0_enable_cp_power_gating(adev, false);
4618 
4619 		/* update gfx cgpg state */
4620 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4621 
4622 		/* update mgcg state */
4623 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4624 
4625 		if (enable)
4626 			amdgpu_gfx_off_ctrl(adev, true);
4627 		break;
4628 	case CHIP_VEGA12:
4629 		if (!enable) {
4630 			amdgpu_gfx_off_ctrl(adev, false);
4631 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4632 		} else {
4633 			amdgpu_gfx_off_ctrl(adev, true);
4634 		}
4635 		break;
4636 	default:
4637 		break;
4638 	}
4639 
4640 	return 0;
4641 }
4642 
4643 static int gfx_v9_0_set_clockgating_state(void *handle,
4644 					  enum amd_clockgating_state state)
4645 {
4646 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4647 
4648 	if (amdgpu_sriov_vf(adev))
4649 		return 0;
4650 
4651 	switch (adev->asic_type) {
4652 	case CHIP_VEGA10:
4653 	case CHIP_VEGA12:
4654 	case CHIP_VEGA20:
4655 	case CHIP_RAVEN:
4656 	case CHIP_ARCTURUS:
4657 	case CHIP_RENOIR:
4658 		gfx_v9_0_update_gfx_clock_gating(adev,
4659 						 state == AMD_CG_STATE_GATE ? true : false);
4660 		break;
4661 	default:
4662 		break;
4663 	}
4664 	return 0;
4665 }
4666 
4667 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4668 {
4669 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4670 	int data;
4671 
4672 	if (amdgpu_sriov_vf(adev))
4673 		*flags = 0;
4674 
4675 	/* AMD_CG_SUPPORT_GFX_MGCG */
4676 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4677 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4678 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4679 
4680 	/* AMD_CG_SUPPORT_GFX_CGCG */
4681 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4682 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4683 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4684 
4685 	/* AMD_CG_SUPPORT_GFX_CGLS */
4686 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4687 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4688 
4689 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4690 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4691 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4692 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4693 
4694 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4695 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4696 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4697 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4698 
4699 	if (adev->asic_type != CHIP_ARCTURUS) {
4700 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4701 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4702 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4703 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4704 
4705 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4706 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4707 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4708 	}
4709 }
4710 
4711 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4712 {
4713 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4714 }
4715 
4716 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4717 {
4718 	struct amdgpu_device *adev = ring->adev;
4719 	u64 wptr;
4720 
4721 	/* XXX check if swapping is necessary on BE */
4722 	if (ring->use_doorbell) {
4723 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4724 	} else {
4725 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4726 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4727 	}
4728 
4729 	return wptr;
4730 }
4731 
4732 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4733 {
4734 	struct amdgpu_device *adev = ring->adev;
4735 
4736 	if (ring->use_doorbell) {
4737 		/* XXX check if swapping is necessary on BE */
4738 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4739 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4740 	} else {
4741 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4742 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4743 	}
4744 }
4745 
4746 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4747 {
4748 	struct amdgpu_device *adev = ring->adev;
4749 	u32 ref_and_mask, reg_mem_engine;
4750 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4751 
4752 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4753 		switch (ring->me) {
4754 		case 1:
4755 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4756 			break;
4757 		case 2:
4758 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4759 			break;
4760 		default:
4761 			return;
4762 		}
4763 		reg_mem_engine = 0;
4764 	} else {
4765 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4766 		reg_mem_engine = 1; /* pfp */
4767 	}
4768 
4769 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4770 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4771 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4772 			      ref_and_mask, ref_and_mask, 0x20);
4773 }
4774 
4775 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4776 					struct amdgpu_job *job,
4777 					struct amdgpu_ib *ib,
4778 					uint32_t flags)
4779 {
4780 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4781 	u32 header, control = 0;
4782 
4783 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4784 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4785 	else
4786 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4787 
4788 	control |= ib->length_dw | (vmid << 24);
4789 
4790 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4791 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4792 
4793 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4794 			gfx_v9_0_ring_emit_de_meta(ring);
4795 	}
4796 
4797 	amdgpu_ring_write(ring, header);
4798 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4799 	amdgpu_ring_write(ring,
4800 #ifdef __BIG_ENDIAN
4801 		(2 << 0) |
4802 #endif
4803 		lower_32_bits(ib->gpu_addr));
4804 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4805 	amdgpu_ring_write(ring, control);
4806 }
4807 
4808 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4809 					  struct amdgpu_job *job,
4810 					  struct amdgpu_ib *ib,
4811 					  uint32_t flags)
4812 {
4813 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4814 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4815 
4816 	/* Currently, there is a high possibility to get wave ID mismatch
4817 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4818 	 * different wave IDs than the GDS expects. This situation happens
4819 	 * randomly when at least 5 compute pipes use GDS ordered append.
4820 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4821 	 * Those are probably bugs somewhere else in the kernel driver.
4822 	 *
4823 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4824 	 * GDS to 0 for this ring (me/pipe).
4825 	 */
4826 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4827 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4828 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4829 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4830 	}
4831 
4832 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4833 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4834 	amdgpu_ring_write(ring,
4835 #ifdef __BIG_ENDIAN
4836 				(2 << 0) |
4837 #endif
4838 				lower_32_bits(ib->gpu_addr));
4839 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4840 	amdgpu_ring_write(ring, control);
4841 }
4842 
4843 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4844 				     u64 seq, unsigned flags)
4845 {
4846 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4847 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4848 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4849 
4850 	/* RELEASE_MEM - flush caches, send int */
4851 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4852 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4853 					       EOP_TC_NC_ACTION_EN) :
4854 					      (EOP_TCL1_ACTION_EN |
4855 					       EOP_TC_ACTION_EN |
4856 					       EOP_TC_WB_ACTION_EN |
4857 					       EOP_TC_MD_ACTION_EN)) |
4858 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4859 				 EVENT_INDEX(5)));
4860 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4861 
4862 	/*
4863 	 * the address should be Qword aligned if 64bit write, Dword
4864 	 * aligned if only send 32bit data low (discard data high)
4865 	 */
4866 	if (write64bit)
4867 		BUG_ON(addr & 0x7);
4868 	else
4869 		BUG_ON(addr & 0x3);
4870 	amdgpu_ring_write(ring, lower_32_bits(addr));
4871 	amdgpu_ring_write(ring, upper_32_bits(addr));
4872 	amdgpu_ring_write(ring, lower_32_bits(seq));
4873 	amdgpu_ring_write(ring, upper_32_bits(seq));
4874 	amdgpu_ring_write(ring, 0);
4875 }
4876 
4877 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4878 {
4879 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4880 	uint32_t seq = ring->fence_drv.sync_seq;
4881 	uint64_t addr = ring->fence_drv.gpu_addr;
4882 
4883 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4884 			      lower_32_bits(addr), upper_32_bits(addr),
4885 			      seq, 0xffffffff, 4);
4886 }
4887 
4888 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4889 					unsigned vmid, uint64_t pd_addr)
4890 {
4891 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4892 
4893 	/* compute doesn't have PFP */
4894 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4895 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4896 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4897 		amdgpu_ring_write(ring, 0x0);
4898 	}
4899 }
4900 
4901 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4902 {
4903 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4904 }
4905 
4906 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4907 {
4908 	u64 wptr;
4909 
4910 	/* XXX check if swapping is necessary on BE */
4911 	if (ring->use_doorbell)
4912 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4913 	else
4914 		BUG();
4915 	return wptr;
4916 }
4917 
4918 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4919 					   bool acquire)
4920 {
4921 	struct amdgpu_device *adev = ring->adev;
4922 	int pipe_num, tmp, reg;
4923 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4924 
4925 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4926 
4927 	/* first me only has 2 entries, GFX and HP3D */
4928 	if (ring->me > 0)
4929 		pipe_num -= 2;
4930 
4931 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4932 	tmp = RREG32(reg);
4933 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4934 	WREG32(reg, tmp);
4935 }
4936 
4937 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4938 					    struct amdgpu_ring *ring,
4939 					    bool acquire)
4940 {
4941 	int i, pipe;
4942 	bool reserve;
4943 	struct amdgpu_ring *iring;
4944 
4945 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4946 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4947 	if (acquire)
4948 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4949 	else
4950 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4951 
4952 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4953 		/* Clear all reservations - everyone reacquires all resources */
4954 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4955 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4956 						       true);
4957 
4958 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4959 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4960 						       true);
4961 	} else {
4962 		/* Lower all pipes without a current reservation */
4963 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4964 			iring = &adev->gfx.gfx_ring[i];
4965 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4966 							   iring->me,
4967 							   iring->pipe,
4968 							   0);
4969 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4970 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4971 		}
4972 
4973 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4974 			iring = &adev->gfx.compute_ring[i];
4975 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4976 							   iring->me,
4977 							   iring->pipe,
4978 							   0);
4979 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4980 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4981 		}
4982 	}
4983 
4984 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4985 }
4986 
4987 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4988 				      struct amdgpu_ring *ring,
4989 				      bool acquire)
4990 {
4991 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4992 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4993 
4994 	mutex_lock(&adev->srbm_mutex);
4995 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4996 
4997 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4998 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4999 
5000 	soc15_grbm_select(adev, 0, 0, 0, 0);
5001 	mutex_unlock(&adev->srbm_mutex);
5002 }
5003 
5004 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5005 					       enum drm_sched_priority priority)
5006 {
5007 	struct amdgpu_device *adev = ring->adev;
5008 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5009 
5010 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5011 		return;
5012 
5013 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5014 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5015 }
5016 
5017 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5018 {
5019 	struct amdgpu_device *adev = ring->adev;
5020 
5021 	/* XXX check if swapping is necessary on BE */
5022 	if (ring->use_doorbell) {
5023 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5024 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5025 	} else{
5026 		BUG(); /* only DOORBELL method supported on gfx9 now */
5027 	}
5028 }
5029 
5030 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5031 					 u64 seq, unsigned int flags)
5032 {
5033 	struct amdgpu_device *adev = ring->adev;
5034 
5035 	/* we only allocate 32bit for each seq wb address */
5036 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5037 
5038 	/* write fence seq to the "addr" */
5039 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5040 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5041 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5042 	amdgpu_ring_write(ring, lower_32_bits(addr));
5043 	amdgpu_ring_write(ring, upper_32_bits(addr));
5044 	amdgpu_ring_write(ring, lower_32_bits(seq));
5045 
5046 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5047 		/* set register to trigger INT */
5048 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5049 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5050 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5051 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5052 		amdgpu_ring_write(ring, 0);
5053 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5054 	}
5055 }
5056 
5057 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5058 {
5059 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5060 	amdgpu_ring_write(ring, 0);
5061 }
5062 
5063 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5064 {
5065 	struct v9_ce_ib_state ce_payload = {0};
5066 	uint64_t csa_addr;
5067 	int cnt;
5068 
5069 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5070 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5071 
5072 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5073 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5074 				 WRITE_DATA_DST_SEL(8) |
5075 				 WR_CONFIRM) |
5076 				 WRITE_DATA_CACHE_POLICY(0));
5077 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5078 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5079 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5080 }
5081 
5082 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5083 {
5084 	struct v9_de_ib_state de_payload = {0};
5085 	uint64_t csa_addr, gds_addr;
5086 	int cnt;
5087 
5088 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5089 	gds_addr = csa_addr + 4096;
5090 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5091 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5092 
5093 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5094 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5095 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5096 				 WRITE_DATA_DST_SEL(8) |
5097 				 WR_CONFIRM) |
5098 				 WRITE_DATA_CACHE_POLICY(0));
5099 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5100 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5101 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5102 }
5103 
5104 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5105 {
5106 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5107 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5108 }
5109 
5110 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5111 {
5112 	uint32_t dw2 = 0;
5113 
5114 	if (amdgpu_sriov_vf(ring->adev))
5115 		gfx_v9_0_ring_emit_ce_meta(ring);
5116 
5117 	gfx_v9_0_ring_emit_tmz(ring, true);
5118 
5119 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5120 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5121 		/* set load_global_config & load_global_uconfig */
5122 		dw2 |= 0x8001;
5123 		/* set load_cs_sh_regs */
5124 		dw2 |= 0x01000000;
5125 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5126 		dw2 |= 0x10002;
5127 
5128 		/* set load_ce_ram if preamble presented */
5129 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5130 			dw2 |= 0x10000000;
5131 	} else {
5132 		/* still load_ce_ram if this is the first time preamble presented
5133 		 * although there is no context switch happens.
5134 		 */
5135 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5136 			dw2 |= 0x10000000;
5137 	}
5138 
5139 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5140 	amdgpu_ring_write(ring, dw2);
5141 	amdgpu_ring_write(ring, 0);
5142 }
5143 
5144 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5145 {
5146 	unsigned ret;
5147 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5148 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5149 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5150 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5151 	ret = ring->wptr & ring->buf_mask;
5152 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5153 	return ret;
5154 }
5155 
5156 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5157 {
5158 	unsigned cur;
5159 	BUG_ON(offset > ring->buf_mask);
5160 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5161 
5162 	cur = (ring->wptr & ring->buf_mask) - 1;
5163 	if (likely(cur > offset))
5164 		ring->ring[offset] = cur - offset;
5165 	else
5166 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5167 }
5168 
5169 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5170 {
5171 	struct amdgpu_device *adev = ring->adev;
5172 
5173 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5174 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5175 				(5 << 8) |	/* dst: memory */
5176 				(1 << 20));	/* write confirm */
5177 	amdgpu_ring_write(ring, reg);
5178 	amdgpu_ring_write(ring, 0);
5179 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5180 				adev->virt.reg_val_offs * 4));
5181 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5182 				adev->virt.reg_val_offs * 4));
5183 }
5184 
5185 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5186 				    uint32_t val)
5187 {
5188 	uint32_t cmd = 0;
5189 
5190 	switch (ring->funcs->type) {
5191 	case AMDGPU_RING_TYPE_GFX:
5192 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5193 		break;
5194 	case AMDGPU_RING_TYPE_KIQ:
5195 		cmd = (1 << 16); /* no inc addr */
5196 		break;
5197 	default:
5198 		cmd = WR_CONFIRM;
5199 		break;
5200 	}
5201 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5202 	amdgpu_ring_write(ring, cmd);
5203 	amdgpu_ring_write(ring, reg);
5204 	amdgpu_ring_write(ring, 0);
5205 	amdgpu_ring_write(ring, val);
5206 }
5207 
5208 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5209 					uint32_t val, uint32_t mask)
5210 {
5211 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5212 }
5213 
5214 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5215 						  uint32_t reg0, uint32_t reg1,
5216 						  uint32_t ref, uint32_t mask)
5217 {
5218 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5219 	struct amdgpu_device *adev = ring->adev;
5220 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5221 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5222 
5223 	if (fw_version_ok)
5224 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5225 				      ref, mask, 0x20);
5226 	else
5227 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5228 							   ref, mask);
5229 }
5230 
5231 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5232 {
5233 	struct amdgpu_device *adev = ring->adev;
5234 	uint32_t value = 0;
5235 
5236 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5237 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5238 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5239 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5240 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5241 }
5242 
5243 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5244 						 enum amdgpu_interrupt_state state)
5245 {
5246 	switch (state) {
5247 	case AMDGPU_IRQ_STATE_DISABLE:
5248 	case AMDGPU_IRQ_STATE_ENABLE:
5249 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5250 			       TIME_STAMP_INT_ENABLE,
5251 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5252 		break;
5253 	default:
5254 		break;
5255 	}
5256 }
5257 
5258 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5259 						     int me, int pipe,
5260 						     enum amdgpu_interrupt_state state)
5261 {
5262 	u32 mec_int_cntl, mec_int_cntl_reg;
5263 
5264 	/*
5265 	 * amdgpu controls only the first MEC. That's why this function only
5266 	 * handles the setting of interrupts for this specific MEC. All other
5267 	 * pipes' interrupts are set by amdkfd.
5268 	 */
5269 
5270 	if (me == 1) {
5271 		switch (pipe) {
5272 		case 0:
5273 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5274 			break;
5275 		case 1:
5276 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5277 			break;
5278 		case 2:
5279 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5280 			break;
5281 		case 3:
5282 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5283 			break;
5284 		default:
5285 			DRM_DEBUG("invalid pipe %d\n", pipe);
5286 			return;
5287 		}
5288 	} else {
5289 		DRM_DEBUG("invalid me %d\n", me);
5290 		return;
5291 	}
5292 
5293 	switch (state) {
5294 	case AMDGPU_IRQ_STATE_DISABLE:
5295 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5296 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5297 					     TIME_STAMP_INT_ENABLE, 0);
5298 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5299 		break;
5300 	case AMDGPU_IRQ_STATE_ENABLE:
5301 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5302 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5303 					     TIME_STAMP_INT_ENABLE, 1);
5304 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5305 		break;
5306 	default:
5307 		break;
5308 	}
5309 }
5310 
5311 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5312 					     struct amdgpu_irq_src *source,
5313 					     unsigned type,
5314 					     enum amdgpu_interrupt_state state)
5315 {
5316 	switch (state) {
5317 	case AMDGPU_IRQ_STATE_DISABLE:
5318 	case AMDGPU_IRQ_STATE_ENABLE:
5319 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5320 			       PRIV_REG_INT_ENABLE,
5321 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5322 		break;
5323 	default:
5324 		break;
5325 	}
5326 
5327 	return 0;
5328 }
5329 
5330 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5331 					      struct amdgpu_irq_src *source,
5332 					      unsigned type,
5333 					      enum amdgpu_interrupt_state state)
5334 {
5335 	switch (state) {
5336 	case AMDGPU_IRQ_STATE_DISABLE:
5337 	case AMDGPU_IRQ_STATE_ENABLE:
5338 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5339 			       PRIV_INSTR_INT_ENABLE,
5340 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5341 	default:
5342 		break;
5343 	}
5344 
5345 	return 0;
5346 }
5347 
5348 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5349 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5350 			CP_ECC_ERROR_INT_ENABLE, 1)
5351 
5352 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5353 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5354 			CP_ECC_ERROR_INT_ENABLE, 0)
5355 
5356 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5357 					      struct amdgpu_irq_src *source,
5358 					      unsigned type,
5359 					      enum amdgpu_interrupt_state state)
5360 {
5361 	switch (state) {
5362 	case AMDGPU_IRQ_STATE_DISABLE:
5363 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5364 				CP_ECC_ERROR_INT_ENABLE, 0);
5365 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5366 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5367 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5368 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5369 		break;
5370 
5371 	case AMDGPU_IRQ_STATE_ENABLE:
5372 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5373 				CP_ECC_ERROR_INT_ENABLE, 1);
5374 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5375 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5376 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5377 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5378 		break;
5379 	default:
5380 		break;
5381 	}
5382 
5383 	return 0;
5384 }
5385 
5386 
5387 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5388 					    struct amdgpu_irq_src *src,
5389 					    unsigned type,
5390 					    enum amdgpu_interrupt_state state)
5391 {
5392 	switch (type) {
5393 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5394 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5395 		break;
5396 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5397 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5398 		break;
5399 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5400 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5401 		break;
5402 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5403 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5404 		break;
5405 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5406 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5407 		break;
5408 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5409 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5410 		break;
5411 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5412 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5413 		break;
5414 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5415 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5416 		break;
5417 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5418 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5419 		break;
5420 	default:
5421 		break;
5422 	}
5423 	return 0;
5424 }
5425 
5426 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5427 			    struct amdgpu_irq_src *source,
5428 			    struct amdgpu_iv_entry *entry)
5429 {
5430 	int i;
5431 	u8 me_id, pipe_id, queue_id;
5432 	struct amdgpu_ring *ring;
5433 
5434 	DRM_DEBUG("IH: CP EOP\n");
5435 	me_id = (entry->ring_id & 0x0c) >> 2;
5436 	pipe_id = (entry->ring_id & 0x03) >> 0;
5437 	queue_id = (entry->ring_id & 0x70) >> 4;
5438 
5439 	switch (me_id) {
5440 	case 0:
5441 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5442 		break;
5443 	case 1:
5444 	case 2:
5445 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5446 			ring = &adev->gfx.compute_ring[i];
5447 			/* Per-queue interrupt is supported for MEC starting from VI.
5448 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5449 			  */
5450 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5451 				amdgpu_fence_process(ring);
5452 		}
5453 		break;
5454 	}
5455 	return 0;
5456 }
5457 
5458 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5459 			   struct amdgpu_iv_entry *entry)
5460 {
5461 	u8 me_id, pipe_id, queue_id;
5462 	struct amdgpu_ring *ring;
5463 	int i;
5464 
5465 	me_id = (entry->ring_id & 0x0c) >> 2;
5466 	pipe_id = (entry->ring_id & 0x03) >> 0;
5467 	queue_id = (entry->ring_id & 0x70) >> 4;
5468 
5469 	switch (me_id) {
5470 	case 0:
5471 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5472 		break;
5473 	case 1:
5474 	case 2:
5475 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5476 			ring = &adev->gfx.compute_ring[i];
5477 			if (ring->me == me_id && ring->pipe == pipe_id &&
5478 			    ring->queue == queue_id)
5479 				drm_sched_fault(&ring->sched);
5480 		}
5481 		break;
5482 	}
5483 }
5484 
5485 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5486 				 struct amdgpu_irq_src *source,
5487 				 struct amdgpu_iv_entry *entry)
5488 {
5489 	DRM_ERROR("Illegal register access in command stream\n");
5490 	gfx_v9_0_fault(adev, entry);
5491 	return 0;
5492 }
5493 
5494 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5495 				  struct amdgpu_irq_src *source,
5496 				  struct amdgpu_iv_entry *entry)
5497 {
5498 	DRM_ERROR("Illegal instruction in command stream\n");
5499 	gfx_v9_0_fault(adev, entry);
5500 	return 0;
5501 }
5502 
5503 
5504 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5505 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5506 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5507 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5508 	},
5509 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5510 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5511 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5512 	},
5513 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5514 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5515 	  0, 0
5516 	},
5517 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5518 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5519 	  0, 0
5520 	},
5521 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5522 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5523 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5524 	},
5525 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5526 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5527 	  0, 0
5528 	},
5529 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5530 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5531 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5532 	},
5533 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5534 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5535 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5536 	},
5537 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5538 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5539 	  0, 0
5540 	},
5541 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5542 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5543 	  0, 0
5544 	},
5545 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5546 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5547 	  0, 0
5548 	},
5549 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5550 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5551 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5552 	},
5553 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5554 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5555 	  0, 0
5556 	},
5557 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5558 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5559 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5560 	},
5561 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5562 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5563 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5564 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5565 	},
5566 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5567 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5568 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5569 	  0, 0
5570 	},
5571 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5572 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5573 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5574 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5575 	},
5576 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5577 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5578 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5579 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5580 	},
5581 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5582 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5583 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5584 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5585 	},
5586 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5587 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5588 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5589 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5590 	},
5591 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5592 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5593 	  0, 0
5594 	},
5595 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5596 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5597 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5598 	},
5599 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5600 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5601 	  0, 0
5602 	},
5603 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5604 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5605 	  0, 0
5606 	},
5607 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5608 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5609 	  0, 0
5610 	},
5611 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5612 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5613 	  0, 0
5614 	},
5615 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5616 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5617 	  0, 0
5618 	},
5619 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5620 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5621 	  0, 0
5622 	},
5623 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5624 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5625 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5626 	},
5627 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5628 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5629 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5630 	},
5631 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5632 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5633 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5634 	},
5635 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5636 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5637 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5638 	},
5639 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5640 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5641 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5642 	},
5643 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5644 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5645 	  0, 0
5646 	},
5647 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5648 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5649 	  0, 0
5650 	},
5651 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5652 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5653 	  0, 0
5654 	},
5655 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5656 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5657 	  0, 0
5658 	},
5659 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5660 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5661 	  0, 0
5662 	},
5663 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5664 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5665 	  0, 0
5666 	},
5667 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5668 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5669 	  0, 0
5670 	},
5671 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5672 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5673 	  0, 0
5674 	},
5675 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5676 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5677 	  0, 0
5678 	},
5679 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5680 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5681 	  0, 0
5682 	},
5683 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5684 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5685 	  0, 0
5686 	},
5687 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5688 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5689 	  0, 0
5690 	},
5691 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5692 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5693 	  0, 0
5694 	},
5695 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5696 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5697 	  0, 0
5698 	},
5699 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5700 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5701 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5702 	},
5703 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5704 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5705 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5706 	},
5707 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5708 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5709 	  0, 0
5710 	},
5711 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5712 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5713 	  0, 0
5714 	},
5715 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5716 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5717 	  0, 0
5718 	},
5719 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5720 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5721 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5722 	},
5723 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5724 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5725 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5726 	},
5727 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5728 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5729 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5730 	},
5731 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5732 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5733 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5734 	},
5735 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5736 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5737 	  0, 0
5738 	},
5739 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5740 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5741 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5742 	},
5743 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5744 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5745 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5746 	},
5747 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5748 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5749 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5750 	},
5751 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5752 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5753 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5754 	},
5755 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5756 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5757 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5758 	},
5759 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5760 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5761 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5762 	},
5763 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5764 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5765 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5766 	},
5767 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5768 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5769 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5770 	},
5771 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5772 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5773 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5774 	},
5775 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5776 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5777 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5778 	},
5779 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5780 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5781 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5782 	},
5783 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5784 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5785 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5786 	},
5787 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5788 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5789 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5790 	},
5791 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5792 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5793 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5794 	},
5795 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5796 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5797 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5798 	},
5799 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5800 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5801 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5802 	},
5803 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5804 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5805 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5806 	},
5807 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5808 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5809 	  0, 0
5810 	},
5811 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5812 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5813 	  0, 0
5814 	},
5815 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5816 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5817 	  0, 0
5818 	},
5819 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5820 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5821 	  0, 0
5822 	},
5823 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5824 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5825 	  0, 0
5826 	},
5827 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5828 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5829 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5830 	},
5831 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5832 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5833 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5834 	},
5835 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5836 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5837 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5838 	},
5839 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5840 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5841 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5842 	},
5843 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5844 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5845 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5846 	},
5847 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5848 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5849 	  0, 0
5850 	},
5851 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5852 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5853 	  0, 0
5854 	},
5855 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5856 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5857 	  0, 0
5858 	},
5859 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5860 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5861 	  0, 0
5862 	},
5863 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5864 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5865 	  0, 0
5866 	},
5867 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5868 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5869 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5870 	},
5871 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5872 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5873 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5874 	},
5875 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5876 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5877 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5878 	},
5879 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5880 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5881 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5882 	},
5883 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5884 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5885 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5886 	},
5887 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5888 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5889 	  0, 0
5890 	},
5891 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5892 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5893 	  0, 0
5894 	},
5895 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5896 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5897 	  0, 0
5898 	},
5899 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5900 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5901 	  0, 0
5902 	},
5903 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5904 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5905 	  0, 0
5906 	},
5907 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5908 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5909 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5910 	},
5911 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5912 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5913 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5914 	},
5915 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5916 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5917 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5918 	},
5919 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5920 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5921 	  0, 0
5922 	},
5923 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5924 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5925 	  0, 0
5926 	},
5927 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5928 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5929 	  0, 0
5930 	},
5931 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5932 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5933 	  0, 0
5934 	},
5935 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5936 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5937 	  0, 0
5938 	},
5939 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5940 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5941 	  0, 0
5942 	}
5943 };
5944 
5945 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5946 				     void *inject_if)
5947 {
5948 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5949 	int ret;
5950 	struct ta_ras_trigger_error_input block_info = { 0 };
5951 
5952 	if (adev->asic_type != CHIP_VEGA20)
5953 		return -EINVAL;
5954 
5955 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5956 		return -EINVAL;
5957 
5958 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5959 		return -EPERM;
5960 
5961 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5962 	      info->head.type)) {
5963 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5964 			ras_gfx_subblocks[info->head.sub_block_index].name,
5965 			info->head.type);
5966 		return -EPERM;
5967 	}
5968 
5969 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5970 	      info->head.type)) {
5971 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5972 			ras_gfx_subblocks[info->head.sub_block_index].name,
5973 			info->head.type);
5974 		return -EPERM;
5975 	}
5976 
5977 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5978 	block_info.sub_block_index =
5979 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5980 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5981 	block_info.address = info->address;
5982 	block_info.value = info->value;
5983 
5984 	mutex_lock(&adev->grbm_idx_mutex);
5985 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
5986 	mutex_unlock(&adev->grbm_idx_mutex);
5987 
5988 	return ret;
5989 }
5990 
5991 static const char *vml2_mems[] = {
5992 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5993 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5994 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
5995 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
5996 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5997 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5998 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
5999 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6000 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6001 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6002 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6003 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6004 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6005 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6006 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6007 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6008 };
6009 
6010 static const char *vml2_walker_mems[] = {
6011 	"UTC_VML2_CACHE_PDE0_MEM0",
6012 	"UTC_VML2_CACHE_PDE0_MEM1",
6013 	"UTC_VML2_CACHE_PDE1_MEM0",
6014 	"UTC_VML2_CACHE_PDE1_MEM1",
6015 	"UTC_VML2_CACHE_PDE2_MEM0",
6016 	"UTC_VML2_CACHE_PDE2_MEM1",
6017 	"UTC_VML2_RDIF_LOG_FIFO",
6018 };
6019 
6020 static const char *atc_l2_cache_2m_mems[] = {
6021 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6022 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6023 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6024 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6025 };
6026 
6027 static const char *atc_l2_cache_4k_mems[] = {
6028 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6029 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6030 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6031 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6032 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6033 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6034 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6035 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6036 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6037 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6038 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6039 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6040 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6041 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6042 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6043 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6044 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6045 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6046 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6047 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6048 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6049 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6050 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6051 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6052 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6053 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6054 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6055 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6056 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6057 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6058 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6059 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6060 };
6061 
6062 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6063 					 struct ras_err_data *err_data)
6064 {
6065 	uint32_t i, data;
6066 	uint32_t sec_count, ded_count;
6067 
6068 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6069 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6070 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6071 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6072 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6073 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6074 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6075 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6076 
6077 	for (i = 0; i < 16; i++) {
6078 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6079 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6080 
6081 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6082 		if (sec_count) {
6083 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6084 				 vml2_mems[i], sec_count);
6085 			err_data->ce_count += sec_count;
6086 		}
6087 
6088 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6089 		if (ded_count) {
6090 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6091 				 vml2_mems[i], ded_count);
6092 			err_data->ue_count += ded_count;
6093 		}
6094 	}
6095 
6096 	for (i = 0; i < 7; i++) {
6097 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6098 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6099 
6100 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6101 						SEC_COUNT);
6102 		if (sec_count) {
6103 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6104 				 vml2_walker_mems[i], sec_count);
6105 			err_data->ce_count += sec_count;
6106 		}
6107 
6108 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6109 						DED_COUNT);
6110 		if (ded_count) {
6111 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6112 				 vml2_walker_mems[i], ded_count);
6113 			err_data->ue_count += ded_count;
6114 		}
6115 	}
6116 
6117 	for (i = 0; i < 4; i++) {
6118 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6119 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6120 
6121 		sec_count = (data & 0x00006000L) >> 0xd;
6122 		if (sec_count) {
6123 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6124 				 atc_l2_cache_2m_mems[i], sec_count);
6125 			err_data->ce_count += sec_count;
6126 		}
6127 	}
6128 
6129 	for (i = 0; i < 32; i++) {
6130 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6131 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6132 
6133 		sec_count = (data & 0x00006000L) >> 0xd;
6134 		if (sec_count) {
6135 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6136 				 atc_l2_cache_4k_mems[i], sec_count);
6137 			err_data->ce_count += sec_count;
6138 		}
6139 
6140 		ded_count = (data & 0x00018000L) >> 0xf;
6141 		if (ded_count) {
6142 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6143 				 atc_l2_cache_4k_mems[i], ded_count);
6144 			err_data->ue_count += ded_count;
6145 		}
6146 	}
6147 
6148 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6149 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6150 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6151 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6152 
6153 	return 0;
6154 }
6155 
6156 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6157 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6158 	uint32_t *sec_count, uint32_t *ded_count)
6159 {
6160 	uint32_t i;
6161 	uint32_t sec_cnt, ded_cnt;
6162 
6163 	for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6164 		if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6165 			gc_ras_fields_vg20[i].seg != reg->seg ||
6166 			gc_ras_fields_vg20[i].inst != reg->inst)
6167 			continue;
6168 
6169 		sec_cnt = (value &
6170 				gc_ras_fields_vg20[i].sec_count_mask) >>
6171 				gc_ras_fields_vg20[i].sec_count_shift;
6172 		if (sec_cnt) {
6173 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6174 				gc_ras_fields_vg20[i].name,
6175 				se_id, inst_id,
6176 				sec_cnt);
6177 			*sec_count += sec_cnt;
6178 		}
6179 
6180 		ded_cnt = (value &
6181 				gc_ras_fields_vg20[i].ded_count_mask) >>
6182 				gc_ras_fields_vg20[i].ded_count_shift;
6183 		if (ded_cnt) {
6184 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6185 				gc_ras_fields_vg20[i].name,
6186 				se_id, inst_id,
6187 				ded_cnt);
6188 			*ded_count += ded_cnt;
6189 		}
6190 	}
6191 
6192 	return 0;
6193 }
6194 
6195 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6196 					  void *ras_error_status)
6197 {
6198 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6199 	uint32_t sec_count = 0, ded_count = 0;
6200 	uint32_t i, j, k;
6201 	uint32_t reg_value;
6202 
6203 	if (adev->asic_type != CHIP_VEGA20)
6204 		return -EINVAL;
6205 
6206 	err_data->ue_count = 0;
6207 	err_data->ce_count = 0;
6208 
6209 	mutex_lock(&adev->grbm_idx_mutex);
6210 
6211 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6212 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6213 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6214 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6215 				reg_value =
6216 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6217 				if (reg_value)
6218 					__get_ras_error_count(&sec_ded_counter_registers[i],
6219 							j, k, reg_value,
6220 							&sec_count, &ded_count);
6221 			}
6222 		}
6223 	}
6224 
6225 	err_data->ce_count += sec_count;
6226 	err_data->ue_count += ded_count;
6227 
6228 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6229 	mutex_unlock(&adev->grbm_idx_mutex);
6230 
6231 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6232 
6233 	return 0;
6234 }
6235 
6236 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6237 	.name = "gfx_v9_0",
6238 	.early_init = gfx_v9_0_early_init,
6239 	.late_init = gfx_v9_0_late_init,
6240 	.sw_init = gfx_v9_0_sw_init,
6241 	.sw_fini = gfx_v9_0_sw_fini,
6242 	.hw_init = gfx_v9_0_hw_init,
6243 	.hw_fini = gfx_v9_0_hw_fini,
6244 	.suspend = gfx_v9_0_suspend,
6245 	.resume = gfx_v9_0_resume,
6246 	.is_idle = gfx_v9_0_is_idle,
6247 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6248 	.soft_reset = gfx_v9_0_soft_reset,
6249 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6250 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6251 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6252 };
6253 
6254 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6255 	.type = AMDGPU_RING_TYPE_GFX,
6256 	.align_mask = 0xff,
6257 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6258 	.support_64bit_ptrs = true,
6259 	.vmhub = AMDGPU_GFXHUB_0,
6260 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6261 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6262 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6263 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6264 		5 +  /* COND_EXEC */
6265 		7 +  /* PIPELINE_SYNC */
6266 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6267 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6268 		2 + /* VM_FLUSH */
6269 		8 +  /* FENCE for VM_FLUSH */
6270 		20 + /* GDS switch */
6271 		4 + /* double SWITCH_BUFFER,
6272 		       the first COND_EXEC jump to the place just
6273 			   prior to this double SWITCH_BUFFER  */
6274 		5 + /* COND_EXEC */
6275 		7 +	 /*	HDP_flush */
6276 		4 +	 /*	VGT_flush */
6277 		14 + /*	CE_META */
6278 		31 + /*	DE_META */
6279 		3 + /* CNTX_CTRL */
6280 		5 + /* HDP_INVL */
6281 		8 + 8 + /* FENCE x2 */
6282 		2, /* SWITCH_BUFFER */
6283 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6284 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6285 	.emit_fence = gfx_v9_0_ring_emit_fence,
6286 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6287 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6288 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6289 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6290 	.test_ring = gfx_v9_0_ring_test_ring,
6291 	.test_ib = gfx_v9_0_ring_test_ib,
6292 	.insert_nop = amdgpu_ring_insert_nop,
6293 	.pad_ib = amdgpu_ring_generic_pad_ib,
6294 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6295 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6296 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6297 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6298 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6299 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6300 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6301 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6302 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6303 };
6304 
6305 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6306 	.type = AMDGPU_RING_TYPE_COMPUTE,
6307 	.align_mask = 0xff,
6308 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6309 	.support_64bit_ptrs = true,
6310 	.vmhub = AMDGPU_GFXHUB_0,
6311 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6312 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6313 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6314 	.emit_frame_size =
6315 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6316 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6317 		5 + /* hdp invalidate */
6318 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6319 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6320 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6321 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6322 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6323 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6324 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6325 	.emit_fence = gfx_v9_0_ring_emit_fence,
6326 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6327 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6328 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6329 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6330 	.test_ring = gfx_v9_0_ring_test_ring,
6331 	.test_ib = gfx_v9_0_ring_test_ib,
6332 	.insert_nop = amdgpu_ring_insert_nop,
6333 	.pad_ib = amdgpu_ring_generic_pad_ib,
6334 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6335 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6336 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6337 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6338 };
6339 
6340 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6341 	.type = AMDGPU_RING_TYPE_KIQ,
6342 	.align_mask = 0xff,
6343 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6344 	.support_64bit_ptrs = true,
6345 	.vmhub = AMDGPU_GFXHUB_0,
6346 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6347 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6348 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6349 	.emit_frame_size =
6350 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6351 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6352 		5 + /* hdp invalidate */
6353 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6354 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6355 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6356 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6357 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6358 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6359 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6360 	.test_ring = gfx_v9_0_ring_test_ring,
6361 	.insert_nop = amdgpu_ring_insert_nop,
6362 	.pad_ib = amdgpu_ring_generic_pad_ib,
6363 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6364 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6365 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6366 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6367 };
6368 
6369 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6370 {
6371 	int i;
6372 
6373 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6374 
6375 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6376 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6377 
6378 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6379 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6380 }
6381 
6382 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6383 	.set = gfx_v9_0_set_eop_interrupt_state,
6384 	.process = gfx_v9_0_eop_irq,
6385 };
6386 
6387 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6388 	.set = gfx_v9_0_set_priv_reg_fault_state,
6389 	.process = gfx_v9_0_priv_reg_irq,
6390 };
6391 
6392 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6393 	.set = gfx_v9_0_set_priv_inst_fault_state,
6394 	.process = gfx_v9_0_priv_inst_irq,
6395 };
6396 
6397 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6398 	.set = gfx_v9_0_set_cp_ecc_error_state,
6399 	.process = amdgpu_gfx_cp_ecc_error_irq,
6400 };
6401 
6402 
6403 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6404 {
6405 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6406 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6407 
6408 	adev->gfx.priv_reg_irq.num_types = 1;
6409 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6410 
6411 	adev->gfx.priv_inst_irq.num_types = 1;
6412 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6413 
6414 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6415 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6416 }
6417 
6418 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6419 {
6420 	switch (adev->asic_type) {
6421 	case CHIP_VEGA10:
6422 	case CHIP_VEGA12:
6423 	case CHIP_VEGA20:
6424 	case CHIP_RAVEN:
6425 	case CHIP_ARCTURUS:
6426 	case CHIP_RENOIR:
6427 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6428 		break;
6429 	default:
6430 		break;
6431 	}
6432 }
6433 
6434 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6435 {
6436 	/* init asci gds info */
6437 	switch (adev->asic_type) {
6438 	case CHIP_VEGA10:
6439 	case CHIP_VEGA12:
6440 	case CHIP_VEGA20:
6441 		adev->gds.gds_size = 0x10000;
6442 		break;
6443 	case CHIP_RAVEN:
6444 	case CHIP_ARCTURUS:
6445 		adev->gds.gds_size = 0x1000;
6446 		break;
6447 	default:
6448 		adev->gds.gds_size = 0x10000;
6449 		break;
6450 	}
6451 
6452 	switch (adev->asic_type) {
6453 	case CHIP_VEGA10:
6454 	case CHIP_VEGA20:
6455 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6456 		break;
6457 	case CHIP_VEGA12:
6458 		adev->gds.gds_compute_max_wave_id = 0x27f;
6459 		break;
6460 	case CHIP_RAVEN:
6461 		if (adev->rev_id >= 0x8)
6462 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6463 		else
6464 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6465 		break;
6466 	case CHIP_ARCTURUS:
6467 		adev->gds.gds_compute_max_wave_id = 0xfff;
6468 		break;
6469 	default:
6470 		/* this really depends on the chip */
6471 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6472 		break;
6473 	}
6474 
6475 	adev->gds.gws_size = 64;
6476 	adev->gds.oa_size = 16;
6477 }
6478 
6479 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6480 						 u32 bitmap)
6481 {
6482 	u32 data;
6483 
6484 	if (!bitmap)
6485 		return;
6486 
6487 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6488 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6489 
6490 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6491 }
6492 
6493 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6494 {
6495 	u32 data, mask;
6496 
6497 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6498 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6499 
6500 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6501 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6502 
6503 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6504 
6505 	return (~data) & mask;
6506 }
6507 
6508 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6509 				 struct amdgpu_cu_info *cu_info)
6510 {
6511 	int i, j, k, counter, active_cu_number = 0;
6512 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6513 	unsigned disable_masks[4 * 4];
6514 
6515 	if (!adev || !cu_info)
6516 		return -EINVAL;
6517 
6518 	/*
6519 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6520 	 */
6521 	if (adev->gfx.config.max_shader_engines *
6522 		adev->gfx.config.max_sh_per_se > 16)
6523 		return -EINVAL;
6524 
6525 	amdgpu_gfx_parse_disable_cu(disable_masks,
6526 				    adev->gfx.config.max_shader_engines,
6527 				    adev->gfx.config.max_sh_per_se);
6528 
6529 	mutex_lock(&adev->grbm_idx_mutex);
6530 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6531 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6532 			mask = 1;
6533 			ao_bitmap = 0;
6534 			counter = 0;
6535 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6536 			gfx_v9_0_set_user_cu_inactive_bitmap(
6537 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6538 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6539 
6540 			/*
6541 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6542 			 * 4x4 size array, and it's usually suitable for Vega
6543 			 * ASICs which has 4*2 SE/SH layout.
6544 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6545 			 * To mostly reduce the impact, we make it compatible
6546 			 * with current bitmap array as below:
6547 			 *    SE4,SH0 --> bitmap[0][1]
6548 			 *    SE5,SH0 --> bitmap[1][1]
6549 			 *    SE6,SH0 --> bitmap[2][1]
6550 			 *    SE7,SH0 --> bitmap[3][1]
6551 			 */
6552 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6553 
6554 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6555 				if (bitmap & mask) {
6556 					if (counter < adev->gfx.config.max_cu_per_sh)
6557 						ao_bitmap |= mask;
6558 					counter ++;
6559 				}
6560 				mask <<= 1;
6561 			}
6562 			active_cu_number += counter;
6563 			if (i < 2 && j < 2)
6564 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6565 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6566 		}
6567 	}
6568 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6569 	mutex_unlock(&adev->grbm_idx_mutex);
6570 
6571 	cu_info->number = active_cu_number;
6572 	cu_info->ao_cu_mask = ao_cu_mask;
6573 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6574 
6575 	return 0;
6576 }
6577 
6578 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6579 {
6580 	.type = AMD_IP_BLOCK_TYPE_GFX,
6581 	.major = 9,
6582 	.minor = 0,
6583 	.rev = 0,
6584 	.funcs = &gfx_v9_0_ip_funcs,
6585 };
6586