xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 8781e5df)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 enum ta_ras_gfx_subblock {
135 	/*CPC*/
136 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPC_UCODE,
139 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	/* CPF*/
147 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 	TA_RAS_BLOCK__GFX_CPF_TAG,
151 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 	/* CPG*/
153 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 	TA_RAS_BLOCK__GFX_CPG_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 	/* GDS*/
159 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	/* SPI*/
167 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 	/* SQ*/
169 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 	/* SQC (3 ranges)*/
176 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 	/* SQC range 0*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	/* SQC range 1*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	/* SQC range 2*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 	/* TA*/
219 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	/* TCA*/
227 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	/* TCC (5 sub-ranges)*/
232 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 	/* TCC range 0*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	/* TCC range 1*/
245 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	/* TCC range 2*/
251 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	/* TCC range 3*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	/* TCC range 4*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 	/* TCI*/
277 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 	/* TCP*/
279 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	/* TD*/
289 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	/* EA (3 sub-ranges)*/
295 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 	/* EA range 0*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	/* EA range 1*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	/* EA range 2*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 	/* UTC VM L2 bank*/
326 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 	/* UTC VM walker*/
328 	TA_RAS_BLOCK__UTC_VML2_WALKER,
329 	/* UTC ATC L2 2MB cache*/
330 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 	/* UTC ATC L2 4KB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 	TA_RAS_BLOCK__GFX_MAX
334 };
335 
336 struct ras_gfx_subblock {
337 	unsigned char *name;
338 	int ta_subblock;
339 	int hw_supported_error_type;
340 	int sw_supported_error_type;
341 };
342 
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345 		#subblock,                                                     \
346 		TA_RAS_BLOCK__##subblock,                                      \
347 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349 	}
350 
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369 			     0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 			     0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382 			     0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 			     0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 			     0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392 			     1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 			     0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436 			     1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457 			     0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 			     0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464 			     0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500 
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
523 };
524 
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546 
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561 
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
588 };
589 
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600 
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623 
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646 
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666 
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
682 };
683 
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
695 };
696 
697 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
698 {
699 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 };
708 
709 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
710 {
711 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 };
720 
721 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
722 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
723 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
724 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
725 
726 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
729 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
730 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
731                                  struct amdgpu_cu_info *cu_info);
732 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
733 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
734 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
735 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
736 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
737 					  void *ras_error_status);
738 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
739 				     void *inject_if);
740 
741 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
742 {
743 	switch (adev->asic_type) {
744 	case CHIP_VEGA10:
745 		soc15_program_register_sequence(adev,
746 						golden_settings_gc_9_0,
747 						ARRAY_SIZE(golden_settings_gc_9_0));
748 		soc15_program_register_sequence(adev,
749 						golden_settings_gc_9_0_vg10,
750 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
751 		break;
752 	case CHIP_VEGA12:
753 		soc15_program_register_sequence(adev,
754 						golden_settings_gc_9_2_1,
755 						ARRAY_SIZE(golden_settings_gc_9_2_1));
756 		soc15_program_register_sequence(adev,
757 						golden_settings_gc_9_2_1_vg12,
758 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
759 		break;
760 	case CHIP_VEGA20:
761 		soc15_program_register_sequence(adev,
762 						golden_settings_gc_9_0,
763 						ARRAY_SIZE(golden_settings_gc_9_0));
764 		soc15_program_register_sequence(adev,
765 						golden_settings_gc_9_0_vg20,
766 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
767 		break;
768 	case CHIP_ARCTURUS:
769 		soc15_program_register_sequence(adev,
770 						golden_settings_gc_9_4_1_arct,
771 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
772 		break;
773 	case CHIP_RAVEN:
774 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
775 						ARRAY_SIZE(golden_settings_gc_9_1));
776 		if (adev->rev_id >= 8)
777 			soc15_program_register_sequence(adev,
778 							golden_settings_gc_9_1_rv2,
779 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
780 		else
781 			soc15_program_register_sequence(adev,
782 							golden_settings_gc_9_1_rv1,
783 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
784 		break;
785 	 case CHIP_RENOIR:
786 		soc15_program_register_sequence(adev,
787 						golden_settings_gc_9_1_rn,
788 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
789 		return; /* for renoir, don't need common goldensetting */
790 	default:
791 		break;
792 	}
793 
794 	if (adev->asic_type != CHIP_ARCTURUS)
795 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
796 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
797 }
798 
799 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
800 {
801 	adev->gfx.scratch.num_reg = 8;
802 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
803 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
804 }
805 
806 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
807 				       bool wc, uint32_t reg, uint32_t val)
808 {
809 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
810 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
811 				WRITE_DATA_DST_SEL(0) |
812 				(wc ? WR_CONFIRM : 0));
813 	amdgpu_ring_write(ring, reg);
814 	amdgpu_ring_write(ring, 0);
815 	amdgpu_ring_write(ring, val);
816 }
817 
818 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
819 				  int mem_space, int opt, uint32_t addr0,
820 				  uint32_t addr1, uint32_t ref, uint32_t mask,
821 				  uint32_t inv)
822 {
823 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
824 	amdgpu_ring_write(ring,
825 				 /* memory (1) or register (0) */
826 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
827 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
828 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
829 				 WAIT_REG_MEM_ENGINE(eng_sel)));
830 
831 	if (mem_space)
832 		BUG_ON(addr0 & 0x3); /* Dword align */
833 	amdgpu_ring_write(ring, addr0);
834 	amdgpu_ring_write(ring, addr1);
835 	amdgpu_ring_write(ring, ref);
836 	amdgpu_ring_write(ring, mask);
837 	amdgpu_ring_write(ring, inv); /* poll interval */
838 }
839 
840 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
841 {
842 	struct amdgpu_device *adev = ring->adev;
843 	uint32_t scratch;
844 	uint32_t tmp = 0;
845 	unsigned i;
846 	int r;
847 
848 	r = amdgpu_gfx_scratch_get(adev, &scratch);
849 	if (r)
850 		return r;
851 
852 	WREG32(scratch, 0xCAFEDEAD);
853 	r = amdgpu_ring_alloc(ring, 3);
854 	if (r)
855 		goto error_free_scratch;
856 
857 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
858 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
859 	amdgpu_ring_write(ring, 0xDEADBEEF);
860 	amdgpu_ring_commit(ring);
861 
862 	for (i = 0; i < adev->usec_timeout; i++) {
863 		tmp = RREG32(scratch);
864 		if (tmp == 0xDEADBEEF)
865 			break;
866 		udelay(1);
867 	}
868 
869 	if (i >= adev->usec_timeout)
870 		r = -ETIMEDOUT;
871 
872 error_free_scratch:
873 	amdgpu_gfx_scratch_free(adev, scratch);
874 	return r;
875 }
876 
877 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
878 {
879 	struct amdgpu_device *adev = ring->adev;
880 	struct amdgpu_ib ib;
881 	struct dma_fence *f = NULL;
882 
883 	unsigned index;
884 	uint64_t gpu_addr;
885 	uint32_t tmp;
886 	long r;
887 
888 	r = amdgpu_device_wb_get(adev, &index);
889 	if (r)
890 		return r;
891 
892 	gpu_addr = adev->wb.gpu_addr + (index * 4);
893 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
894 	memset(&ib, 0, sizeof(ib));
895 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
896 	if (r)
897 		goto err1;
898 
899 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
900 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
901 	ib.ptr[2] = lower_32_bits(gpu_addr);
902 	ib.ptr[3] = upper_32_bits(gpu_addr);
903 	ib.ptr[4] = 0xDEADBEEF;
904 	ib.length_dw = 5;
905 
906 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
907 	if (r)
908 		goto err2;
909 
910 	r = dma_fence_wait_timeout(f, false, timeout);
911 	if (r == 0) {
912 		r = -ETIMEDOUT;
913 		goto err2;
914 	} else if (r < 0) {
915 		goto err2;
916 	}
917 
918 	tmp = adev->wb.wb[index];
919 	if (tmp == 0xDEADBEEF)
920 		r = 0;
921 	else
922 		r = -EINVAL;
923 
924 err2:
925 	amdgpu_ib_free(adev, &ib, NULL);
926 	dma_fence_put(f);
927 err1:
928 	amdgpu_device_wb_free(adev, index);
929 	return r;
930 }
931 
932 
933 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
934 {
935 	release_firmware(adev->gfx.pfp_fw);
936 	adev->gfx.pfp_fw = NULL;
937 	release_firmware(adev->gfx.me_fw);
938 	adev->gfx.me_fw = NULL;
939 	release_firmware(adev->gfx.ce_fw);
940 	adev->gfx.ce_fw = NULL;
941 	release_firmware(adev->gfx.rlc_fw);
942 	adev->gfx.rlc_fw = NULL;
943 	release_firmware(adev->gfx.mec_fw);
944 	adev->gfx.mec_fw = NULL;
945 	release_firmware(adev->gfx.mec2_fw);
946 	adev->gfx.mec2_fw = NULL;
947 
948 	kfree(adev->gfx.rlc.register_list_format);
949 }
950 
951 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
952 {
953 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
954 
955 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
956 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
957 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
958 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
959 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
960 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
961 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
962 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
963 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
964 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
965 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
966 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
967 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
968 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
969 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
970 }
971 
972 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
973 {
974 	adev->gfx.me_fw_write_wait = false;
975 	adev->gfx.mec_fw_write_wait = false;
976 
977 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
978 	    (adev->gfx.mec_feature_version < 46) ||
979 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
980 	    (adev->gfx.pfp_feature_version < 46))
981 		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
982 			      GRBM requires 1-cycle delay in cp firmware\n");
983 
984 	switch (adev->asic_type) {
985 	case CHIP_VEGA10:
986 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
987 		    (adev->gfx.me_feature_version >= 42) &&
988 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
989 		    (adev->gfx.pfp_feature_version >= 42))
990 			adev->gfx.me_fw_write_wait = true;
991 
992 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
993 		    (adev->gfx.mec_feature_version >= 42))
994 			adev->gfx.mec_fw_write_wait = true;
995 		break;
996 	case CHIP_VEGA12:
997 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
998 		    (adev->gfx.me_feature_version >= 44) &&
999 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1000 		    (adev->gfx.pfp_feature_version >= 44))
1001 			adev->gfx.me_fw_write_wait = true;
1002 
1003 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1004 		    (adev->gfx.mec_feature_version >= 44))
1005 			adev->gfx.mec_fw_write_wait = true;
1006 		break;
1007 	case CHIP_VEGA20:
1008 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1009 		    (adev->gfx.me_feature_version >= 44) &&
1010 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1011 		    (adev->gfx.pfp_feature_version >= 44))
1012 			adev->gfx.me_fw_write_wait = true;
1013 
1014 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1015 		    (adev->gfx.mec_feature_version >= 44))
1016 			adev->gfx.mec_fw_write_wait = true;
1017 		break;
1018 	case CHIP_RAVEN:
1019 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1020 		    (adev->gfx.me_feature_version >= 42) &&
1021 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1022 		    (adev->gfx.pfp_feature_version >= 42))
1023 			adev->gfx.me_fw_write_wait = true;
1024 
1025 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1026 		    (adev->gfx.mec_feature_version >= 42))
1027 			adev->gfx.mec_fw_write_wait = true;
1028 		break;
1029 	default:
1030 		break;
1031 	}
1032 }
1033 
1034 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1035 {
1036 	switch (adev->asic_type) {
1037 	case CHIP_VEGA10:
1038 	case CHIP_VEGA12:
1039 	case CHIP_VEGA20:
1040 		break;
1041 	case CHIP_RAVEN:
1042 		/* Disable GFXOFF on original raven.  There are combinations
1043 		 * of sbios and platforms that are not stable.
1044 		 */
1045 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
1046 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1047 		else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1048 			 &&((adev->gfx.rlc_fw_version != 106 &&
1049 			     adev->gfx.rlc_fw_version < 531) ||
1050 			    (adev->gfx.rlc_fw_version == 53815) ||
1051 			    (adev->gfx.rlc_feature_version < 1) ||
1052 			    !adev->gfx.rlc.is_rlc_v2_1))
1053 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1054 
1055 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1056 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1057 				AMD_PG_SUPPORT_CP |
1058 				AMD_PG_SUPPORT_RLC_SMU_HS;
1059 		break;
1060 	case CHIP_RENOIR:
1061 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1062 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1063 				AMD_PG_SUPPORT_CP |
1064 				AMD_PG_SUPPORT_RLC_SMU_HS;
1065 		break;
1066 	default:
1067 		break;
1068 	}
1069 }
1070 
1071 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1072 					  const char *chip_name)
1073 {
1074 	char fw_name[30];
1075 	int err;
1076 	struct amdgpu_firmware_info *info = NULL;
1077 	const struct common_firmware_header *header = NULL;
1078 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1079 
1080 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1081 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1082 	if (err)
1083 		goto out;
1084 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1085 	if (err)
1086 		goto out;
1087 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1088 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1089 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1090 
1091 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1092 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1093 	if (err)
1094 		goto out;
1095 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1096 	if (err)
1097 		goto out;
1098 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1099 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1100 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1101 
1102 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1103 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1104 	if (err)
1105 		goto out;
1106 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1107 	if (err)
1108 		goto out;
1109 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1110 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1111 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1112 
1113 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1114 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1115 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1116 		info->fw = adev->gfx.pfp_fw;
1117 		header = (const struct common_firmware_header *)info->fw->data;
1118 		adev->firmware.fw_size +=
1119 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1120 
1121 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1122 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1123 		info->fw = adev->gfx.me_fw;
1124 		header = (const struct common_firmware_header *)info->fw->data;
1125 		adev->firmware.fw_size +=
1126 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1127 
1128 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1129 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1130 		info->fw = adev->gfx.ce_fw;
1131 		header = (const struct common_firmware_header *)info->fw->data;
1132 		adev->firmware.fw_size +=
1133 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1134 	}
1135 
1136 out:
1137 	if (err) {
1138 		dev_err(adev->dev,
1139 			"gfx9: Failed to load firmware \"%s\"\n",
1140 			fw_name);
1141 		release_firmware(adev->gfx.pfp_fw);
1142 		adev->gfx.pfp_fw = NULL;
1143 		release_firmware(adev->gfx.me_fw);
1144 		adev->gfx.me_fw = NULL;
1145 		release_firmware(adev->gfx.ce_fw);
1146 		adev->gfx.ce_fw = NULL;
1147 	}
1148 	return err;
1149 }
1150 
1151 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1152 					  const char *chip_name)
1153 {
1154 	char fw_name[30];
1155 	int err;
1156 	struct amdgpu_firmware_info *info = NULL;
1157 	const struct common_firmware_header *header = NULL;
1158 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1159 	unsigned int *tmp = NULL;
1160 	unsigned int i = 0;
1161 	uint16_t version_major;
1162 	uint16_t version_minor;
1163 	uint32_t smu_version;
1164 
1165 	/*
1166 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1167 	 * instead of picasso_rlc.bin.
1168 	 * Judgment method:
1169 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1170 	 *          or revision >= 0xD8 && revision <= 0xDF
1171 	 * otherwise is PCO FP5
1172 	 */
1173 	if (!strcmp(chip_name, "picasso") &&
1174 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1175 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1176 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1177 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1178 		(smu_version >= 0x41e2b))
1179 		/**
1180 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1181 		*/
1182 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1183 	else
1184 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1185 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1186 	if (err)
1187 		goto out;
1188 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1189 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1190 
1191 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1192 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1193 	if (version_major == 2 && version_minor == 1)
1194 		adev->gfx.rlc.is_rlc_v2_1 = true;
1195 
1196 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1197 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1198 	adev->gfx.rlc.save_and_restore_offset =
1199 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1200 	adev->gfx.rlc.clear_state_descriptor_offset =
1201 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1202 	adev->gfx.rlc.avail_scratch_ram_locations =
1203 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1204 	adev->gfx.rlc.reg_restore_list_size =
1205 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1206 	adev->gfx.rlc.reg_list_format_start =
1207 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1208 	adev->gfx.rlc.reg_list_format_separate_start =
1209 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1210 	adev->gfx.rlc.starting_offsets_start =
1211 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1212 	adev->gfx.rlc.reg_list_format_size_bytes =
1213 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1214 	adev->gfx.rlc.reg_list_size_bytes =
1215 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1216 	adev->gfx.rlc.register_list_format =
1217 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1218 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1219 	if (!adev->gfx.rlc.register_list_format) {
1220 		err = -ENOMEM;
1221 		goto out;
1222 	}
1223 
1224 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1225 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1226 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1227 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1228 
1229 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1230 
1231 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1232 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1233 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1234 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1235 
1236 	if (adev->gfx.rlc.is_rlc_v2_1)
1237 		gfx_v9_0_init_rlc_ext_microcode(adev);
1238 
1239 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1240 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1241 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1242 		info->fw = adev->gfx.rlc_fw;
1243 		header = (const struct common_firmware_header *)info->fw->data;
1244 		adev->firmware.fw_size +=
1245 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1246 
1247 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1248 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1249 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1250 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1251 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1252 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1253 			info->fw = adev->gfx.rlc_fw;
1254 			adev->firmware.fw_size +=
1255 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1256 
1257 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1258 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1259 			info->fw = adev->gfx.rlc_fw;
1260 			adev->firmware.fw_size +=
1261 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1262 
1263 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1264 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1265 			info->fw = adev->gfx.rlc_fw;
1266 			adev->firmware.fw_size +=
1267 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1268 		}
1269 	}
1270 
1271 out:
1272 	if (err) {
1273 		dev_err(adev->dev,
1274 			"gfx9: Failed to load firmware \"%s\"\n",
1275 			fw_name);
1276 		release_firmware(adev->gfx.rlc_fw);
1277 		adev->gfx.rlc_fw = NULL;
1278 	}
1279 	return err;
1280 }
1281 
1282 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1283 					  const char *chip_name)
1284 {
1285 	char fw_name[30];
1286 	int err;
1287 	struct amdgpu_firmware_info *info = NULL;
1288 	const struct common_firmware_header *header = NULL;
1289 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1290 
1291 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1292 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1293 	if (err)
1294 		goto out;
1295 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1296 	if (err)
1297 		goto out;
1298 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1299 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1300 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1301 
1302 
1303 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1304 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1305 	if (!err) {
1306 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1307 		if (err)
1308 			goto out;
1309 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1310 		adev->gfx.mec2_fw->data;
1311 		adev->gfx.mec2_fw_version =
1312 		le32_to_cpu(cp_hdr->header.ucode_version);
1313 		adev->gfx.mec2_feature_version =
1314 		le32_to_cpu(cp_hdr->ucode_feature_version);
1315 	} else {
1316 		err = 0;
1317 		adev->gfx.mec2_fw = NULL;
1318 	}
1319 
1320 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1321 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1322 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1323 		info->fw = adev->gfx.mec_fw;
1324 		header = (const struct common_firmware_header *)info->fw->data;
1325 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1326 		adev->firmware.fw_size +=
1327 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1328 
1329 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1330 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1331 		info->fw = adev->gfx.mec_fw;
1332 		adev->firmware.fw_size +=
1333 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1334 
1335 		if (adev->gfx.mec2_fw) {
1336 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1337 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1338 			info->fw = adev->gfx.mec2_fw;
1339 			header = (const struct common_firmware_header *)info->fw->data;
1340 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1341 			adev->firmware.fw_size +=
1342 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1343 
1344 			/* TODO: Determine if MEC2 JT FW loading can be removed
1345 				 for all GFX V9 asic and above */
1346 			if (adev->asic_type != CHIP_ARCTURUS &&
1347 			    adev->asic_type != CHIP_RENOIR) {
1348 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1349 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1350 				info->fw = adev->gfx.mec2_fw;
1351 				adev->firmware.fw_size +=
1352 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1353 					PAGE_SIZE);
1354 			}
1355 		}
1356 	}
1357 
1358 out:
1359 	gfx_v9_0_check_if_need_gfxoff(adev);
1360 	gfx_v9_0_check_fw_write_wait(adev);
1361 	if (err) {
1362 		dev_err(adev->dev,
1363 			"gfx9: Failed to load firmware \"%s\"\n",
1364 			fw_name);
1365 		release_firmware(adev->gfx.mec_fw);
1366 		adev->gfx.mec_fw = NULL;
1367 		release_firmware(adev->gfx.mec2_fw);
1368 		adev->gfx.mec2_fw = NULL;
1369 	}
1370 	return err;
1371 }
1372 
1373 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1374 {
1375 	const char *chip_name;
1376 	int r;
1377 
1378 	DRM_DEBUG("\n");
1379 
1380 	switch (adev->asic_type) {
1381 	case CHIP_VEGA10:
1382 		chip_name = "vega10";
1383 		break;
1384 	case CHIP_VEGA12:
1385 		chip_name = "vega12";
1386 		break;
1387 	case CHIP_VEGA20:
1388 		chip_name = "vega20";
1389 		break;
1390 	case CHIP_RAVEN:
1391 		if (adev->rev_id >= 8)
1392 			chip_name = "raven2";
1393 		else if (adev->pdev->device == 0x15d8)
1394 			chip_name = "picasso";
1395 		else
1396 			chip_name = "raven";
1397 		break;
1398 	case CHIP_ARCTURUS:
1399 		chip_name = "arcturus";
1400 		break;
1401 	case CHIP_RENOIR:
1402 		chip_name = "renoir";
1403 		break;
1404 	default:
1405 		BUG();
1406 	}
1407 
1408 	/* No CPG in Arcturus */
1409 	if (adev->asic_type != CHIP_ARCTURUS) {
1410 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1411 		if (r)
1412 			return r;
1413 	}
1414 
1415 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1416 	if (r)
1417 		return r;
1418 
1419 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1420 	if (r)
1421 		return r;
1422 
1423 	return r;
1424 }
1425 
1426 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1427 {
1428 	u32 count = 0;
1429 	const struct cs_section_def *sect = NULL;
1430 	const struct cs_extent_def *ext = NULL;
1431 
1432 	/* begin clear state */
1433 	count += 2;
1434 	/* context control state */
1435 	count += 3;
1436 
1437 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1438 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1439 			if (sect->id == SECT_CONTEXT)
1440 				count += 2 + ext->reg_count;
1441 			else
1442 				return 0;
1443 		}
1444 	}
1445 
1446 	/* end clear state */
1447 	count += 2;
1448 	/* clear state */
1449 	count += 2;
1450 
1451 	return count;
1452 }
1453 
1454 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1455 				    volatile u32 *buffer)
1456 {
1457 	u32 count = 0, i;
1458 	const struct cs_section_def *sect = NULL;
1459 	const struct cs_extent_def *ext = NULL;
1460 
1461 	if (adev->gfx.rlc.cs_data == NULL)
1462 		return;
1463 	if (buffer == NULL)
1464 		return;
1465 
1466 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1467 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1468 
1469 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1470 	buffer[count++] = cpu_to_le32(0x80000000);
1471 	buffer[count++] = cpu_to_le32(0x80000000);
1472 
1473 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1474 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1475 			if (sect->id == SECT_CONTEXT) {
1476 				buffer[count++] =
1477 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1478 				buffer[count++] = cpu_to_le32(ext->reg_index -
1479 						PACKET3_SET_CONTEXT_REG_START);
1480 				for (i = 0; i < ext->reg_count; i++)
1481 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1482 			} else {
1483 				return;
1484 			}
1485 		}
1486 	}
1487 
1488 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1489 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1490 
1491 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1492 	buffer[count++] = cpu_to_le32(0);
1493 }
1494 
1495 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1496 {
1497 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1498 	uint32_t pg_always_on_cu_num = 2;
1499 	uint32_t always_on_cu_num;
1500 	uint32_t i, j, k;
1501 	uint32_t mask, cu_bitmap, counter;
1502 
1503 	if (adev->flags & AMD_IS_APU)
1504 		always_on_cu_num = 4;
1505 	else if (adev->asic_type == CHIP_VEGA12)
1506 		always_on_cu_num = 8;
1507 	else
1508 		always_on_cu_num = 12;
1509 
1510 	mutex_lock(&adev->grbm_idx_mutex);
1511 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1512 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1513 			mask = 1;
1514 			cu_bitmap = 0;
1515 			counter = 0;
1516 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1517 
1518 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1519 				if (cu_info->bitmap[i][j] & mask) {
1520 					if (counter == pg_always_on_cu_num)
1521 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1522 					if (counter < always_on_cu_num)
1523 						cu_bitmap |= mask;
1524 					else
1525 						break;
1526 					counter++;
1527 				}
1528 				mask <<= 1;
1529 			}
1530 
1531 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1532 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1533 		}
1534 	}
1535 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1536 	mutex_unlock(&adev->grbm_idx_mutex);
1537 }
1538 
1539 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1540 {
1541 	uint32_t data;
1542 
1543 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1544 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1545 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1546 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1547 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1548 
1549 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1550 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1551 
1552 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1553 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1554 
1555 	mutex_lock(&adev->grbm_idx_mutex);
1556 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1557 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1558 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1559 
1560 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1561 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1562 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1563 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1564 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1565 
1566 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1567 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1568 	data &= 0x0000FFFF;
1569 	data |= 0x00C00000;
1570 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1571 
1572 	/*
1573 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1574 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1575 	 */
1576 
1577 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1578 	 * but used for RLC_LB_CNTL configuration */
1579 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1580 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1581 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1582 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1583 	mutex_unlock(&adev->grbm_idx_mutex);
1584 
1585 	gfx_v9_0_init_always_on_cu_mask(adev);
1586 }
1587 
1588 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1589 {
1590 	uint32_t data;
1591 
1592 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1593 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1594 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1595 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1596 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1597 
1598 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1599 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1600 
1601 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1602 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1603 
1604 	mutex_lock(&adev->grbm_idx_mutex);
1605 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1606 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1607 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1608 
1609 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1610 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1611 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1612 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1613 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1614 
1615 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1616 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1617 	data &= 0x0000FFFF;
1618 	data |= 0x00C00000;
1619 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1620 
1621 	/*
1622 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1623 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1624 	 */
1625 
1626 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1627 	 * but used for RLC_LB_CNTL configuration */
1628 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1629 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1630 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1631 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1632 	mutex_unlock(&adev->grbm_idx_mutex);
1633 
1634 	gfx_v9_0_init_always_on_cu_mask(adev);
1635 }
1636 
1637 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1638 {
1639 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1640 }
1641 
1642 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1643 {
1644 	return 5;
1645 }
1646 
1647 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1648 {
1649 	const struct cs_section_def *cs_data;
1650 	int r;
1651 
1652 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1653 
1654 	cs_data = adev->gfx.rlc.cs_data;
1655 
1656 	if (cs_data) {
1657 		/* init clear state block */
1658 		r = amdgpu_gfx_rlc_init_csb(adev);
1659 		if (r)
1660 			return r;
1661 	}
1662 
1663 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1664 		/* TODO: double check the cp_table_size for RV */
1665 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1666 		r = amdgpu_gfx_rlc_init_cpt(adev);
1667 		if (r)
1668 			return r;
1669 	}
1670 
1671 	switch (adev->asic_type) {
1672 	case CHIP_RAVEN:
1673 		gfx_v9_0_init_lbpw(adev);
1674 		break;
1675 	case CHIP_VEGA20:
1676 		gfx_v9_4_init_lbpw(adev);
1677 		break;
1678 	default:
1679 		break;
1680 	}
1681 
1682 	return 0;
1683 }
1684 
1685 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1686 {
1687 	int r;
1688 
1689 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1690 	if (unlikely(r != 0))
1691 		return r;
1692 
1693 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1694 			AMDGPU_GEM_DOMAIN_VRAM);
1695 	if (!r)
1696 		adev->gfx.rlc.clear_state_gpu_addr =
1697 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1698 
1699 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1700 
1701 	return r;
1702 }
1703 
1704 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1705 {
1706 	int r;
1707 
1708 	if (!adev->gfx.rlc.clear_state_obj)
1709 		return;
1710 
1711 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1712 	if (likely(r == 0)) {
1713 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1714 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1715 	}
1716 }
1717 
1718 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1719 {
1720 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1721 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1722 }
1723 
1724 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1725 {
1726 	int r;
1727 	u32 *hpd;
1728 	const __le32 *fw_data;
1729 	unsigned fw_size;
1730 	u32 *fw;
1731 	size_t mec_hpd_size;
1732 
1733 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1734 
1735 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1736 
1737 	/* take ownership of the relevant compute queues */
1738 	amdgpu_gfx_compute_queue_acquire(adev);
1739 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1740 
1741 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1742 				      AMDGPU_GEM_DOMAIN_VRAM,
1743 				      &adev->gfx.mec.hpd_eop_obj,
1744 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1745 				      (void **)&hpd);
1746 	if (r) {
1747 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1748 		gfx_v9_0_mec_fini(adev);
1749 		return r;
1750 	}
1751 
1752 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1753 
1754 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1755 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1756 
1757 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1758 
1759 	fw_data = (const __le32 *)
1760 		(adev->gfx.mec_fw->data +
1761 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1762 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1763 
1764 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1765 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1766 				      &adev->gfx.mec.mec_fw_obj,
1767 				      &adev->gfx.mec.mec_fw_gpu_addr,
1768 				      (void **)&fw);
1769 	if (r) {
1770 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1771 		gfx_v9_0_mec_fini(adev);
1772 		return r;
1773 	}
1774 
1775 	memcpy(fw, fw_data, fw_size);
1776 
1777 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1778 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1779 
1780 	return 0;
1781 }
1782 
1783 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1784 {
1785 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1786 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1787 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1788 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1789 		(SQ_IND_INDEX__FORCE_READ_MASK));
1790 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1791 }
1792 
1793 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1794 			   uint32_t wave, uint32_t thread,
1795 			   uint32_t regno, uint32_t num, uint32_t *out)
1796 {
1797 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1798 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1799 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1800 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1801 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1802 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1803 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1804 	while (num--)
1805 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1806 }
1807 
1808 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1809 {
1810 	/* type 1 wave data */
1811 	dst[(*no_fields)++] = 1;
1812 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1813 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1814 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1815 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1816 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1817 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1818 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1819 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1820 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1821 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1822 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1823 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1824 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1825 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1826 }
1827 
1828 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1829 				     uint32_t wave, uint32_t start,
1830 				     uint32_t size, uint32_t *dst)
1831 {
1832 	wave_read_regs(
1833 		adev, simd, wave, 0,
1834 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1835 }
1836 
1837 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1838 				     uint32_t wave, uint32_t thread,
1839 				     uint32_t start, uint32_t size,
1840 				     uint32_t *dst)
1841 {
1842 	wave_read_regs(
1843 		adev, simd, wave, thread,
1844 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1845 }
1846 
1847 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1848 				  u32 me, u32 pipe, u32 q, u32 vm)
1849 {
1850 	soc15_grbm_select(adev, me, pipe, q, vm);
1851 }
1852 
1853 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1854 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1855 	.select_se_sh = &gfx_v9_0_select_se_sh,
1856 	.read_wave_data = &gfx_v9_0_read_wave_data,
1857 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1858 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1859 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1860 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1861 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1862 };
1863 
1864 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1865 {
1866 	u32 gb_addr_config;
1867 	int err;
1868 
1869 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1870 
1871 	switch (adev->asic_type) {
1872 	case CHIP_VEGA10:
1873 		adev->gfx.config.max_hw_contexts = 8;
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1878 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1879 		break;
1880 	case CHIP_VEGA12:
1881 		adev->gfx.config.max_hw_contexts = 8;
1882 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1886 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1887 		DRM_INFO("fix gfx.config for vega12\n");
1888 		break;
1889 	case CHIP_VEGA20:
1890 		adev->gfx.config.max_hw_contexts = 8;
1891 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1892 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1893 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1894 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1895 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1896 		gb_addr_config &= ~0xf3e777ff;
1897 		gb_addr_config |= 0x22014042;
1898 		/* check vbios table if gpu info is not available */
1899 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1900 		if (err)
1901 			return err;
1902 		break;
1903 	case CHIP_RAVEN:
1904 		adev->gfx.config.max_hw_contexts = 8;
1905 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1909 		if (adev->rev_id >= 8)
1910 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1911 		else
1912 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1913 		break;
1914 	case CHIP_ARCTURUS:
1915 		adev->gfx.config.max_hw_contexts = 8;
1916 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1917 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1918 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1919 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1920 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1921 		gb_addr_config &= ~0xf3e777ff;
1922 		gb_addr_config |= 0x22014042;
1923 		break;
1924 	case CHIP_RENOIR:
1925 		adev->gfx.config.max_hw_contexts = 8;
1926 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1927 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1928 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1929 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1930 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1931 		gb_addr_config &= ~0xf3e777ff;
1932 		gb_addr_config |= 0x22010042;
1933 		break;
1934 	default:
1935 		BUG();
1936 		break;
1937 	}
1938 
1939 	adev->gfx.config.gb_addr_config = gb_addr_config;
1940 
1941 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1942 			REG_GET_FIELD(
1943 					adev->gfx.config.gb_addr_config,
1944 					GB_ADDR_CONFIG,
1945 					NUM_PIPES);
1946 
1947 	adev->gfx.config.max_tile_pipes =
1948 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1949 
1950 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1951 			REG_GET_FIELD(
1952 					adev->gfx.config.gb_addr_config,
1953 					GB_ADDR_CONFIG,
1954 					NUM_BANKS);
1955 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1956 			REG_GET_FIELD(
1957 					adev->gfx.config.gb_addr_config,
1958 					GB_ADDR_CONFIG,
1959 					MAX_COMPRESSED_FRAGS);
1960 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1961 			REG_GET_FIELD(
1962 					adev->gfx.config.gb_addr_config,
1963 					GB_ADDR_CONFIG,
1964 					NUM_RB_PER_SE);
1965 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1966 			REG_GET_FIELD(
1967 					adev->gfx.config.gb_addr_config,
1968 					GB_ADDR_CONFIG,
1969 					NUM_SHADER_ENGINES);
1970 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1971 			REG_GET_FIELD(
1972 					adev->gfx.config.gb_addr_config,
1973 					GB_ADDR_CONFIG,
1974 					PIPE_INTERLEAVE_SIZE));
1975 
1976 	return 0;
1977 }
1978 
1979 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1980 				      int mec, int pipe, int queue)
1981 {
1982 	int r;
1983 	unsigned irq_type;
1984 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1985 
1986 	ring = &adev->gfx.compute_ring[ring_id];
1987 
1988 	/* mec0 is me1 */
1989 	ring->me = mec + 1;
1990 	ring->pipe = pipe;
1991 	ring->queue = queue;
1992 
1993 	ring->ring_obj = NULL;
1994 	ring->use_doorbell = true;
1995 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1996 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1997 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1998 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1999 
2000 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2001 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2002 		+ ring->pipe;
2003 
2004 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2005 	r = amdgpu_ring_init(adev, ring, 1024,
2006 			     &adev->gfx.eop_irq, irq_type);
2007 	if (r)
2008 		return r;
2009 
2010 
2011 	return 0;
2012 }
2013 
2014 static int gfx_v9_0_sw_init(void *handle)
2015 {
2016 	int i, j, k, r, ring_id;
2017 	struct amdgpu_ring *ring;
2018 	struct amdgpu_kiq *kiq;
2019 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2020 
2021 	switch (adev->asic_type) {
2022 	case CHIP_VEGA10:
2023 	case CHIP_VEGA12:
2024 	case CHIP_VEGA20:
2025 	case CHIP_RAVEN:
2026 	case CHIP_ARCTURUS:
2027 	case CHIP_RENOIR:
2028 		adev->gfx.mec.num_mec = 2;
2029 		break;
2030 	default:
2031 		adev->gfx.mec.num_mec = 1;
2032 		break;
2033 	}
2034 
2035 	adev->gfx.mec.num_pipe_per_mec = 4;
2036 	adev->gfx.mec.num_queue_per_pipe = 8;
2037 
2038 	/* EOP Event */
2039 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2040 	if (r)
2041 		return r;
2042 
2043 	/* Privileged reg */
2044 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2045 			      &adev->gfx.priv_reg_irq);
2046 	if (r)
2047 		return r;
2048 
2049 	/* Privileged inst */
2050 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2051 			      &adev->gfx.priv_inst_irq);
2052 	if (r)
2053 		return r;
2054 
2055 	/* ECC error */
2056 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2057 			      &adev->gfx.cp_ecc_error_irq);
2058 	if (r)
2059 		return r;
2060 
2061 	/* FUE error */
2062 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2063 			      &adev->gfx.cp_ecc_error_irq);
2064 	if (r)
2065 		return r;
2066 
2067 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2068 
2069 	gfx_v9_0_scratch_init(adev);
2070 
2071 	r = gfx_v9_0_init_microcode(adev);
2072 	if (r) {
2073 		DRM_ERROR("Failed to load gfx firmware!\n");
2074 		return r;
2075 	}
2076 
2077 	r = adev->gfx.rlc.funcs->init(adev);
2078 	if (r) {
2079 		DRM_ERROR("Failed to init rlc BOs!\n");
2080 		return r;
2081 	}
2082 
2083 	r = gfx_v9_0_mec_init(adev);
2084 	if (r) {
2085 		DRM_ERROR("Failed to init MEC BOs!\n");
2086 		return r;
2087 	}
2088 
2089 	/* set up the gfx ring */
2090 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2091 		ring = &adev->gfx.gfx_ring[i];
2092 		ring->ring_obj = NULL;
2093 		if (!i)
2094 			sprintf(ring->name, "gfx");
2095 		else
2096 			sprintf(ring->name, "gfx_%d", i);
2097 		ring->use_doorbell = true;
2098 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2099 		r = amdgpu_ring_init(adev, ring, 1024,
2100 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2101 		if (r)
2102 			return r;
2103 	}
2104 
2105 	/* set up the compute queues - allocate horizontally across pipes */
2106 	ring_id = 0;
2107 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2108 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2109 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2110 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2111 					continue;
2112 
2113 				r = gfx_v9_0_compute_ring_init(adev,
2114 							       ring_id,
2115 							       i, k, j);
2116 				if (r)
2117 					return r;
2118 
2119 				ring_id++;
2120 			}
2121 		}
2122 	}
2123 
2124 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2125 	if (r) {
2126 		DRM_ERROR("Failed to init KIQ BOs!\n");
2127 		return r;
2128 	}
2129 
2130 	kiq = &adev->gfx.kiq;
2131 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2132 	if (r)
2133 		return r;
2134 
2135 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2136 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2137 	if (r)
2138 		return r;
2139 
2140 	adev->gfx.ce_ram_size = 0x8000;
2141 
2142 	r = gfx_v9_0_gpu_early_init(adev);
2143 	if (r)
2144 		return r;
2145 
2146 	return 0;
2147 }
2148 
2149 
2150 static int gfx_v9_0_sw_fini(void *handle)
2151 {
2152 	int i;
2153 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2154 
2155 	amdgpu_gfx_ras_fini(adev);
2156 
2157 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2158 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2159 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2160 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2161 
2162 	amdgpu_gfx_mqd_sw_fini(adev);
2163 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2164 	amdgpu_gfx_kiq_fini(adev);
2165 
2166 	gfx_v9_0_mec_fini(adev);
2167 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2168 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2169 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2170 				&adev->gfx.rlc.cp_table_gpu_addr,
2171 				(void **)&adev->gfx.rlc.cp_table_ptr);
2172 	}
2173 	gfx_v9_0_free_microcode(adev);
2174 
2175 	return 0;
2176 }
2177 
2178 
2179 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2180 {
2181 	/* TODO */
2182 }
2183 
2184 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2185 {
2186 	u32 data;
2187 
2188 	if (instance == 0xffffffff)
2189 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2190 	else
2191 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2192 
2193 	if (se_num == 0xffffffff)
2194 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2195 	else
2196 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2197 
2198 	if (sh_num == 0xffffffff)
2199 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2200 	else
2201 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2202 
2203 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2204 }
2205 
2206 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2207 {
2208 	u32 data, mask;
2209 
2210 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2211 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2212 
2213 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2214 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2215 
2216 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2217 					 adev->gfx.config.max_sh_per_se);
2218 
2219 	return (~data) & mask;
2220 }
2221 
2222 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2223 {
2224 	int i, j;
2225 	u32 data;
2226 	u32 active_rbs = 0;
2227 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2228 					adev->gfx.config.max_sh_per_se;
2229 
2230 	mutex_lock(&adev->grbm_idx_mutex);
2231 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2232 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2233 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2234 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2235 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2236 					       rb_bitmap_width_per_sh);
2237 		}
2238 	}
2239 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2240 	mutex_unlock(&adev->grbm_idx_mutex);
2241 
2242 	adev->gfx.config.backend_enable_mask = active_rbs;
2243 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2244 }
2245 
2246 #define DEFAULT_SH_MEM_BASES	(0x6000)
2247 #define FIRST_COMPUTE_VMID	(8)
2248 #define LAST_COMPUTE_VMID	(16)
2249 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2250 {
2251 	int i;
2252 	uint32_t sh_mem_config;
2253 	uint32_t sh_mem_bases;
2254 
2255 	/*
2256 	 * Configure apertures:
2257 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2258 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2259 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2260 	 */
2261 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2262 
2263 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2264 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2265 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2266 
2267 	mutex_lock(&adev->srbm_mutex);
2268 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2269 		soc15_grbm_select(adev, 0, 0, 0, i);
2270 		/* CP and shaders */
2271 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2272 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2273 	}
2274 	soc15_grbm_select(adev, 0, 0, 0, 0);
2275 	mutex_unlock(&adev->srbm_mutex);
2276 
2277 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2278 	   acccess. These should be enabled by FW for target VMIDs. */
2279 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2280 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2281 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2282 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2283 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2284 	}
2285 }
2286 
2287 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2288 {
2289 	int vmid;
2290 
2291 	/*
2292 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2293 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2294 	 * the driver can enable them for graphics. VMID0 should maintain
2295 	 * access so that HWS firmware can save/restore entries.
2296 	 */
2297 	for (vmid = 1; vmid < 16; vmid++) {
2298 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2299 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2300 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2301 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2302 	}
2303 }
2304 
2305 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2306 {
2307 	u32 tmp;
2308 	int i;
2309 
2310 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2311 
2312 	gfx_v9_0_tiling_mode_table_init(adev);
2313 
2314 	gfx_v9_0_setup_rb(adev);
2315 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2316 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2317 
2318 	/* XXX SH_MEM regs */
2319 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2320 	mutex_lock(&adev->srbm_mutex);
2321 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2322 		soc15_grbm_select(adev, 0, 0, 0, i);
2323 		/* CP and shaders */
2324 		if (i == 0) {
2325 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2326 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2327 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2328 					    !!amdgpu_noretry);
2329 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2330 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2331 		} else {
2332 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2333 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2334 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2335 					    !!amdgpu_noretry);
2336 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2337 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2338 				(adev->gmc.private_aperture_start >> 48));
2339 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2340 				(adev->gmc.shared_aperture_start >> 48));
2341 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2342 		}
2343 	}
2344 	soc15_grbm_select(adev, 0, 0, 0, 0);
2345 
2346 	mutex_unlock(&adev->srbm_mutex);
2347 
2348 	gfx_v9_0_init_compute_vmid(adev);
2349 	gfx_v9_0_init_gds_vmid(adev);
2350 }
2351 
2352 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2353 {
2354 	u32 i, j, k;
2355 	u32 mask;
2356 
2357 	mutex_lock(&adev->grbm_idx_mutex);
2358 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2359 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2360 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2361 			for (k = 0; k < adev->usec_timeout; k++) {
2362 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2363 					break;
2364 				udelay(1);
2365 			}
2366 			if (k == adev->usec_timeout) {
2367 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2368 						      0xffffffff, 0xffffffff);
2369 				mutex_unlock(&adev->grbm_idx_mutex);
2370 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2371 					 i, j);
2372 				return;
2373 			}
2374 		}
2375 	}
2376 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2377 	mutex_unlock(&adev->grbm_idx_mutex);
2378 
2379 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2380 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2381 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2382 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2383 	for (k = 0; k < adev->usec_timeout; k++) {
2384 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2385 			break;
2386 		udelay(1);
2387 	}
2388 }
2389 
2390 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2391 					       bool enable)
2392 {
2393 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2394 
2395 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2396 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2397 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2398 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2399 
2400 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2401 }
2402 
2403 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2404 {
2405 	/* csib */
2406 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2407 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2408 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2409 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2410 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2411 			adev->gfx.rlc.clear_state_size);
2412 }
2413 
2414 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2415 				int indirect_offset,
2416 				int list_size,
2417 				int *unique_indirect_regs,
2418 				int unique_indirect_reg_count,
2419 				int *indirect_start_offsets,
2420 				int *indirect_start_offsets_count,
2421 				int max_start_offsets_count)
2422 {
2423 	int idx;
2424 
2425 	for (; indirect_offset < list_size; indirect_offset++) {
2426 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2427 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2428 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2429 
2430 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2431 			indirect_offset += 2;
2432 
2433 			/* look for the matching indice */
2434 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2435 				if (unique_indirect_regs[idx] ==
2436 					register_list_format[indirect_offset] ||
2437 					!unique_indirect_regs[idx])
2438 					break;
2439 			}
2440 
2441 			BUG_ON(idx >= unique_indirect_reg_count);
2442 
2443 			if (!unique_indirect_regs[idx])
2444 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2445 
2446 			indirect_offset++;
2447 		}
2448 	}
2449 }
2450 
2451 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2452 {
2453 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2454 	int unique_indirect_reg_count = 0;
2455 
2456 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2457 	int indirect_start_offsets_count = 0;
2458 
2459 	int list_size = 0;
2460 	int i = 0, j = 0;
2461 	u32 tmp = 0;
2462 
2463 	u32 *register_list_format =
2464 		kmemdup(adev->gfx.rlc.register_list_format,
2465 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2466 	if (!register_list_format)
2467 		return -ENOMEM;
2468 
2469 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2470 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2471 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2472 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2473 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2474 				    unique_indirect_regs,
2475 				    unique_indirect_reg_count,
2476 				    indirect_start_offsets,
2477 				    &indirect_start_offsets_count,
2478 				    ARRAY_SIZE(indirect_start_offsets));
2479 
2480 	/* enable auto inc in case it is disabled */
2481 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2482 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2483 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2484 
2485 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2486 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2487 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2488 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2489 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2490 			adev->gfx.rlc.register_restore[i]);
2491 
2492 	/* load indirect register */
2493 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2494 		adev->gfx.rlc.reg_list_format_start);
2495 
2496 	/* direct register portion */
2497 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2498 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2499 			register_list_format[i]);
2500 
2501 	/* indirect register portion */
2502 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2503 		if (register_list_format[i] == 0xFFFFFFFF) {
2504 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2505 			continue;
2506 		}
2507 
2508 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2509 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2510 
2511 		for (j = 0; j < unique_indirect_reg_count; j++) {
2512 			if (register_list_format[i] == unique_indirect_regs[j]) {
2513 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2514 				break;
2515 			}
2516 		}
2517 
2518 		BUG_ON(j >= unique_indirect_reg_count);
2519 
2520 		i++;
2521 	}
2522 
2523 	/* set save/restore list size */
2524 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2525 	list_size = list_size >> 1;
2526 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2527 		adev->gfx.rlc.reg_restore_list_size);
2528 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2529 
2530 	/* write the starting offsets to RLC scratch ram */
2531 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2532 		adev->gfx.rlc.starting_offsets_start);
2533 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2534 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2535 		       indirect_start_offsets[i]);
2536 
2537 	/* load unique indirect regs*/
2538 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2539 		if (unique_indirect_regs[i] != 0) {
2540 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2541 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2542 			       unique_indirect_regs[i] & 0x3FFFF);
2543 
2544 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2545 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2546 			       unique_indirect_regs[i] >> 20);
2547 		}
2548 	}
2549 
2550 	kfree(register_list_format);
2551 	return 0;
2552 }
2553 
2554 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2555 {
2556 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2557 }
2558 
2559 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2560 					     bool enable)
2561 {
2562 	uint32_t data = 0;
2563 	uint32_t default_data = 0;
2564 
2565 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2566 	if (enable == true) {
2567 		/* enable GFXIP control over CGPG */
2568 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2569 		if(default_data != data)
2570 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2571 
2572 		/* update status */
2573 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2574 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2575 		if(default_data != data)
2576 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2577 	} else {
2578 		/* restore GFXIP control over GCPG */
2579 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2580 		if(default_data != data)
2581 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2582 	}
2583 }
2584 
2585 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2586 {
2587 	uint32_t data = 0;
2588 
2589 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2590 			      AMD_PG_SUPPORT_GFX_SMG |
2591 			      AMD_PG_SUPPORT_GFX_DMG)) {
2592 		/* init IDLE_POLL_COUNT = 60 */
2593 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2594 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2595 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2596 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2597 
2598 		/* init RLC PG Delay */
2599 		data = 0;
2600 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2601 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2602 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2603 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2604 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2605 
2606 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2607 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2608 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2609 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2610 
2611 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2612 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2613 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2614 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2615 
2616 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2617 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2618 
2619 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2620 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2621 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2622 
2623 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2624 	}
2625 }
2626 
2627 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2628 						bool enable)
2629 {
2630 	uint32_t data = 0;
2631 	uint32_t default_data = 0;
2632 
2633 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2634 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2635 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2636 			     enable ? 1 : 0);
2637 	if (default_data != data)
2638 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2639 }
2640 
2641 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2642 						bool enable)
2643 {
2644 	uint32_t data = 0;
2645 	uint32_t default_data = 0;
2646 
2647 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2648 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2649 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2650 			     enable ? 1 : 0);
2651 	if(default_data != data)
2652 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2653 }
2654 
2655 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2656 					bool enable)
2657 {
2658 	uint32_t data = 0;
2659 	uint32_t default_data = 0;
2660 
2661 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2662 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2663 			     CP_PG_DISABLE,
2664 			     enable ? 0 : 1);
2665 	if(default_data != data)
2666 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2667 }
2668 
2669 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2670 						bool enable)
2671 {
2672 	uint32_t data, default_data;
2673 
2674 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2675 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2676 			     GFX_POWER_GATING_ENABLE,
2677 			     enable ? 1 : 0);
2678 	if(default_data != data)
2679 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2680 }
2681 
2682 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2683 						bool enable)
2684 {
2685 	uint32_t data, default_data;
2686 
2687 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2688 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2689 			     GFX_PIPELINE_PG_ENABLE,
2690 			     enable ? 1 : 0);
2691 	if(default_data != data)
2692 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2693 
2694 	if (!enable)
2695 		/* read any GFX register to wake up GFX */
2696 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2697 }
2698 
2699 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2700 						       bool enable)
2701 {
2702 	uint32_t data, default_data;
2703 
2704 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2705 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2706 			     STATIC_PER_CU_PG_ENABLE,
2707 			     enable ? 1 : 0);
2708 	if(default_data != data)
2709 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2710 }
2711 
2712 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2713 						bool enable)
2714 {
2715 	uint32_t data, default_data;
2716 
2717 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2718 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2719 			     DYN_PER_CU_PG_ENABLE,
2720 			     enable ? 1 : 0);
2721 	if(default_data != data)
2722 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2723 }
2724 
2725 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2726 {
2727 	gfx_v9_0_init_csb(adev);
2728 
2729 	/*
2730 	 * Rlc save restore list is workable since v2_1.
2731 	 * And it's needed by gfxoff feature.
2732 	 */
2733 	if (adev->gfx.rlc.is_rlc_v2_1) {
2734 		if (adev->asic_type == CHIP_VEGA12 ||
2735 		    (adev->asic_type == CHIP_RAVEN &&
2736 		     adev->rev_id >= 8))
2737 			gfx_v9_1_init_rlc_save_restore_list(adev);
2738 		gfx_v9_0_enable_save_restore_machine(adev);
2739 	}
2740 
2741 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2742 			      AMD_PG_SUPPORT_GFX_SMG |
2743 			      AMD_PG_SUPPORT_GFX_DMG |
2744 			      AMD_PG_SUPPORT_CP |
2745 			      AMD_PG_SUPPORT_GDS |
2746 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2747 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2748 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2749 		gfx_v9_0_init_gfx_power_gating(adev);
2750 	}
2751 }
2752 
2753 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2754 {
2755 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2756 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2757 	gfx_v9_0_wait_for_rlc_serdes(adev);
2758 }
2759 
2760 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2761 {
2762 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2763 	udelay(50);
2764 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2765 	udelay(50);
2766 }
2767 
2768 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2769 {
2770 #ifdef AMDGPU_RLC_DEBUG_RETRY
2771 	u32 rlc_ucode_ver;
2772 #endif
2773 
2774 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2775 	udelay(50);
2776 
2777 	/* carrizo do enable cp interrupt after cp inited */
2778 	if (!(adev->flags & AMD_IS_APU)) {
2779 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2780 		udelay(50);
2781 	}
2782 
2783 #ifdef AMDGPU_RLC_DEBUG_RETRY
2784 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2785 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2786 	if(rlc_ucode_ver == 0x108) {
2787 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2788 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2789 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2790 		 * default is 0x9C4 to create a 100us interval */
2791 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2792 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2793 		 * to disable the page fault retry interrupts, default is
2794 		 * 0x100 (256) */
2795 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2796 	}
2797 #endif
2798 }
2799 
2800 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2801 {
2802 	const struct rlc_firmware_header_v2_0 *hdr;
2803 	const __le32 *fw_data;
2804 	unsigned i, fw_size;
2805 
2806 	if (!adev->gfx.rlc_fw)
2807 		return -EINVAL;
2808 
2809 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2810 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2811 
2812 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2813 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2814 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2815 
2816 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2817 			RLCG_UCODE_LOADING_START_ADDRESS);
2818 	for (i = 0; i < fw_size; i++)
2819 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2820 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2821 
2822 	return 0;
2823 }
2824 
2825 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2826 {
2827 	int r;
2828 
2829 	if (amdgpu_sriov_vf(adev)) {
2830 		gfx_v9_0_init_csb(adev);
2831 		return 0;
2832 	}
2833 
2834 	adev->gfx.rlc.funcs->stop(adev);
2835 
2836 	/* disable CG */
2837 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2838 
2839 	gfx_v9_0_init_pg(adev);
2840 
2841 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2842 		/* legacy rlc firmware loading */
2843 		r = gfx_v9_0_rlc_load_microcode(adev);
2844 		if (r)
2845 			return r;
2846 	}
2847 
2848 	switch (adev->asic_type) {
2849 	case CHIP_RAVEN:
2850 		if (amdgpu_lbpw == 0)
2851 			gfx_v9_0_enable_lbpw(adev, false);
2852 		else
2853 			gfx_v9_0_enable_lbpw(adev, true);
2854 		break;
2855 	case CHIP_VEGA20:
2856 		if (amdgpu_lbpw > 0)
2857 			gfx_v9_0_enable_lbpw(adev, true);
2858 		else
2859 			gfx_v9_0_enable_lbpw(adev, false);
2860 		break;
2861 	default:
2862 		break;
2863 	}
2864 
2865 	adev->gfx.rlc.funcs->start(adev);
2866 
2867 	return 0;
2868 }
2869 
2870 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2871 {
2872 	int i;
2873 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2874 
2875 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2876 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2877 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2878 	if (!enable) {
2879 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2880 			adev->gfx.gfx_ring[i].sched.ready = false;
2881 	}
2882 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2883 	udelay(50);
2884 }
2885 
2886 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2887 {
2888 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2889 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2890 	const struct gfx_firmware_header_v1_0 *me_hdr;
2891 	const __le32 *fw_data;
2892 	unsigned i, fw_size;
2893 
2894 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2895 		return -EINVAL;
2896 
2897 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2898 		adev->gfx.pfp_fw->data;
2899 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2900 		adev->gfx.ce_fw->data;
2901 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2902 		adev->gfx.me_fw->data;
2903 
2904 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2905 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2906 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2907 
2908 	gfx_v9_0_cp_gfx_enable(adev, false);
2909 
2910 	/* PFP */
2911 	fw_data = (const __le32 *)
2912 		(adev->gfx.pfp_fw->data +
2913 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2914 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2915 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2916 	for (i = 0; i < fw_size; i++)
2917 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2918 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2919 
2920 	/* CE */
2921 	fw_data = (const __le32 *)
2922 		(adev->gfx.ce_fw->data +
2923 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2924 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2925 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2926 	for (i = 0; i < fw_size; i++)
2927 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2928 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2929 
2930 	/* ME */
2931 	fw_data = (const __le32 *)
2932 		(adev->gfx.me_fw->data +
2933 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2934 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2935 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2936 	for (i = 0; i < fw_size; i++)
2937 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2938 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2939 
2940 	return 0;
2941 }
2942 
2943 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2944 {
2945 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2946 	const struct cs_section_def *sect = NULL;
2947 	const struct cs_extent_def *ext = NULL;
2948 	int r, i, tmp;
2949 
2950 	/* init the CP */
2951 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2952 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2953 
2954 	gfx_v9_0_cp_gfx_enable(adev, true);
2955 
2956 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2957 	if (r) {
2958 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2959 		return r;
2960 	}
2961 
2962 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2963 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2964 
2965 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2966 	amdgpu_ring_write(ring, 0x80000000);
2967 	amdgpu_ring_write(ring, 0x80000000);
2968 
2969 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2970 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2971 			if (sect->id == SECT_CONTEXT) {
2972 				amdgpu_ring_write(ring,
2973 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2974 					       ext->reg_count));
2975 				amdgpu_ring_write(ring,
2976 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2977 				for (i = 0; i < ext->reg_count; i++)
2978 					amdgpu_ring_write(ring, ext->extent[i]);
2979 			}
2980 		}
2981 	}
2982 
2983 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2984 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2985 
2986 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2987 	amdgpu_ring_write(ring, 0);
2988 
2989 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2990 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2991 	amdgpu_ring_write(ring, 0x8000);
2992 	amdgpu_ring_write(ring, 0x8000);
2993 
2994 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2995 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2996 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2997 	amdgpu_ring_write(ring, tmp);
2998 	amdgpu_ring_write(ring, 0);
2999 
3000 	amdgpu_ring_commit(ring);
3001 
3002 	return 0;
3003 }
3004 
3005 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3006 {
3007 	struct amdgpu_ring *ring;
3008 	u32 tmp;
3009 	u32 rb_bufsz;
3010 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3011 
3012 	/* Set the write pointer delay */
3013 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3014 
3015 	/* set the RB to use vmid 0 */
3016 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3017 
3018 	/* Set ring buffer size */
3019 	ring = &adev->gfx.gfx_ring[0];
3020 	rb_bufsz = order_base_2(ring->ring_size / 8);
3021 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3022 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3023 #ifdef __BIG_ENDIAN
3024 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3025 #endif
3026 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3027 
3028 	/* Initialize the ring buffer's write pointers */
3029 	ring->wptr = 0;
3030 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3031 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3032 
3033 	/* set the wb address wether it's enabled or not */
3034 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3035 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3036 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3037 
3038 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3039 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3040 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3041 
3042 	mdelay(1);
3043 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3044 
3045 	rb_addr = ring->gpu_addr >> 8;
3046 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3047 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3048 
3049 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3050 	if (ring->use_doorbell) {
3051 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3052 				    DOORBELL_OFFSET, ring->doorbell_index);
3053 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3054 				    DOORBELL_EN, 1);
3055 	} else {
3056 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3057 	}
3058 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3059 
3060 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3061 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3062 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3063 
3064 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3065 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3066 
3067 
3068 	/* start the ring */
3069 	gfx_v9_0_cp_gfx_start(adev);
3070 	ring->sched.ready = true;
3071 
3072 	return 0;
3073 }
3074 
3075 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3076 {
3077 	int i;
3078 
3079 	if (enable) {
3080 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3081 	} else {
3082 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3083 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3084 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3085 			adev->gfx.compute_ring[i].sched.ready = false;
3086 		adev->gfx.kiq.ring.sched.ready = false;
3087 	}
3088 	udelay(50);
3089 }
3090 
3091 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3092 {
3093 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3094 	const __le32 *fw_data;
3095 	unsigned i;
3096 	u32 tmp;
3097 
3098 	if (!adev->gfx.mec_fw)
3099 		return -EINVAL;
3100 
3101 	gfx_v9_0_cp_compute_enable(adev, false);
3102 
3103 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3104 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3105 
3106 	fw_data = (const __le32 *)
3107 		(adev->gfx.mec_fw->data +
3108 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3109 	tmp = 0;
3110 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3111 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3112 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3113 
3114 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3115 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3116 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3117 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3118 
3119 	/* MEC1 */
3120 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3121 			 mec_hdr->jt_offset);
3122 	for (i = 0; i < mec_hdr->jt_size; i++)
3123 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3124 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3125 
3126 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3127 			adev->gfx.mec_fw_version);
3128 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3129 
3130 	return 0;
3131 }
3132 
3133 /* KIQ functions */
3134 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3135 {
3136 	uint32_t tmp;
3137 	struct amdgpu_device *adev = ring->adev;
3138 
3139 	/* tell RLC which is KIQ queue */
3140 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3141 	tmp &= 0xffffff00;
3142 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3143 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3144 	tmp |= 0x80;
3145 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3146 }
3147 
3148 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3149 {
3150 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3151 	uint64_t queue_mask = 0;
3152 	int r, i;
3153 
3154 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3155 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3156 			continue;
3157 
3158 		/* This situation may be hit in the future if a new HW
3159 		 * generation exposes more than 64 queues. If so, the
3160 		 * definition of queue_mask needs updating */
3161 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3162 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3163 			break;
3164 		}
3165 
3166 		queue_mask |= (1ull << i);
3167 	}
3168 
3169 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3170 	if (r) {
3171 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3172 		return r;
3173 	}
3174 
3175 	/* set resources */
3176 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3177 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3178 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3179 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3180 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3181 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3182 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3183 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3184 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3185 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3186 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3187 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3188 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3189 
3190 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3191 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3192 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3193 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3194 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3195 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3196 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3197 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3198 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3199 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3200 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3201 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3202 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3203 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3204 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3205 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3206 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3207 	}
3208 
3209 	r = amdgpu_ring_test_helper(kiq_ring);
3210 	if (r)
3211 		DRM_ERROR("KCQ enable failed\n");
3212 
3213 	return r;
3214 }
3215 
3216 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3217 {
3218 	struct amdgpu_device *adev = ring->adev;
3219 	struct v9_mqd *mqd = ring->mqd_ptr;
3220 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3221 	uint32_t tmp;
3222 
3223 	mqd->header = 0xC0310800;
3224 	mqd->compute_pipelinestat_enable = 0x00000001;
3225 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3226 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3227 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3228 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3229 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3230 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3231 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3232 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3233 	mqd->compute_misc_reserved = 0x00000003;
3234 
3235 	mqd->dynamic_cu_mask_addr_lo =
3236 		lower_32_bits(ring->mqd_gpu_addr
3237 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3238 	mqd->dynamic_cu_mask_addr_hi =
3239 		upper_32_bits(ring->mqd_gpu_addr
3240 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3241 
3242 	eop_base_addr = ring->eop_gpu_addr >> 8;
3243 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3244 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3245 
3246 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3247 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3248 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3249 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3250 
3251 	mqd->cp_hqd_eop_control = tmp;
3252 
3253 	/* enable doorbell? */
3254 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3255 
3256 	if (ring->use_doorbell) {
3257 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3258 				    DOORBELL_OFFSET, ring->doorbell_index);
3259 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3260 				    DOORBELL_EN, 1);
3261 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3262 				    DOORBELL_SOURCE, 0);
3263 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3264 				    DOORBELL_HIT, 0);
3265 	} else {
3266 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3267 					 DOORBELL_EN, 0);
3268 	}
3269 
3270 	mqd->cp_hqd_pq_doorbell_control = tmp;
3271 
3272 	/* disable the queue if it's active */
3273 	ring->wptr = 0;
3274 	mqd->cp_hqd_dequeue_request = 0;
3275 	mqd->cp_hqd_pq_rptr = 0;
3276 	mqd->cp_hqd_pq_wptr_lo = 0;
3277 	mqd->cp_hqd_pq_wptr_hi = 0;
3278 
3279 	/* set the pointer to the MQD */
3280 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3281 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3282 
3283 	/* set MQD vmid to 0 */
3284 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3285 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3286 	mqd->cp_mqd_control = tmp;
3287 
3288 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3289 	hqd_gpu_addr = ring->gpu_addr >> 8;
3290 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3291 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3292 
3293 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3294 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3295 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3296 			    (order_base_2(ring->ring_size / 4) - 1));
3297 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3298 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3299 #ifdef __BIG_ENDIAN
3300 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3301 #endif
3302 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3303 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3304 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3305 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3306 	mqd->cp_hqd_pq_control = tmp;
3307 
3308 	/* set the wb address whether it's enabled or not */
3309 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3310 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3311 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3312 		upper_32_bits(wb_gpu_addr) & 0xffff;
3313 
3314 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3315 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3316 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3317 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3318 
3319 	tmp = 0;
3320 	/* enable the doorbell if requested */
3321 	if (ring->use_doorbell) {
3322 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3323 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3324 				DOORBELL_OFFSET, ring->doorbell_index);
3325 
3326 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3327 					 DOORBELL_EN, 1);
3328 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3329 					 DOORBELL_SOURCE, 0);
3330 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3331 					 DOORBELL_HIT, 0);
3332 	}
3333 
3334 	mqd->cp_hqd_pq_doorbell_control = tmp;
3335 
3336 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3337 	ring->wptr = 0;
3338 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3339 
3340 	/* set the vmid for the queue */
3341 	mqd->cp_hqd_vmid = 0;
3342 
3343 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3344 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3345 	mqd->cp_hqd_persistent_state = tmp;
3346 
3347 	/* set MIN_IB_AVAIL_SIZE */
3348 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3349 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3350 	mqd->cp_hqd_ib_control = tmp;
3351 
3352 	/* activate the queue */
3353 	mqd->cp_hqd_active = 1;
3354 
3355 	return 0;
3356 }
3357 
3358 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3359 {
3360 	struct amdgpu_device *adev = ring->adev;
3361 	struct v9_mqd *mqd = ring->mqd_ptr;
3362 	int j;
3363 
3364 	/* disable wptr polling */
3365 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3366 
3367 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3368 	       mqd->cp_hqd_eop_base_addr_lo);
3369 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3370 	       mqd->cp_hqd_eop_base_addr_hi);
3371 
3372 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3373 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3374 	       mqd->cp_hqd_eop_control);
3375 
3376 	/* enable doorbell? */
3377 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3378 	       mqd->cp_hqd_pq_doorbell_control);
3379 
3380 	/* disable the queue if it's active */
3381 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3382 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3383 		for (j = 0; j < adev->usec_timeout; j++) {
3384 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3385 				break;
3386 			udelay(1);
3387 		}
3388 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3389 		       mqd->cp_hqd_dequeue_request);
3390 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3391 		       mqd->cp_hqd_pq_rptr);
3392 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3393 		       mqd->cp_hqd_pq_wptr_lo);
3394 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3395 		       mqd->cp_hqd_pq_wptr_hi);
3396 	}
3397 
3398 	/* set the pointer to the MQD */
3399 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3400 	       mqd->cp_mqd_base_addr_lo);
3401 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3402 	       mqd->cp_mqd_base_addr_hi);
3403 
3404 	/* set MQD vmid to 0 */
3405 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3406 	       mqd->cp_mqd_control);
3407 
3408 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3409 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3410 	       mqd->cp_hqd_pq_base_lo);
3411 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3412 	       mqd->cp_hqd_pq_base_hi);
3413 
3414 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3415 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3416 	       mqd->cp_hqd_pq_control);
3417 
3418 	/* set the wb address whether it's enabled or not */
3419 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3420 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3421 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3422 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3423 
3424 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3425 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3426 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3427 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3428 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3429 
3430 	/* enable the doorbell if requested */
3431 	if (ring->use_doorbell) {
3432 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3433 					(adev->doorbell_index.kiq * 2) << 2);
3434 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3435 					(adev->doorbell_index.userqueue_end * 2) << 2);
3436 	}
3437 
3438 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3439 	       mqd->cp_hqd_pq_doorbell_control);
3440 
3441 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3442 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3443 	       mqd->cp_hqd_pq_wptr_lo);
3444 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3445 	       mqd->cp_hqd_pq_wptr_hi);
3446 
3447 	/* set the vmid for the queue */
3448 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3449 
3450 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3451 	       mqd->cp_hqd_persistent_state);
3452 
3453 	/* activate the queue */
3454 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3455 	       mqd->cp_hqd_active);
3456 
3457 	if (ring->use_doorbell)
3458 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3459 
3460 	return 0;
3461 }
3462 
3463 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3464 {
3465 	struct amdgpu_device *adev = ring->adev;
3466 	int j;
3467 
3468 	/* disable the queue if it's active */
3469 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3470 
3471 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3472 
3473 		for (j = 0; j < adev->usec_timeout; j++) {
3474 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3475 				break;
3476 			udelay(1);
3477 		}
3478 
3479 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3480 			DRM_DEBUG("KIQ dequeue request failed.\n");
3481 
3482 			/* Manual disable if dequeue request times out */
3483 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3484 		}
3485 
3486 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3487 		      0);
3488 	}
3489 
3490 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3491 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3492 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3493 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3494 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3495 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3496 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3497 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3498 
3499 	return 0;
3500 }
3501 
3502 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3503 {
3504 	struct amdgpu_device *adev = ring->adev;
3505 	struct v9_mqd *mqd = ring->mqd_ptr;
3506 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3507 
3508 	gfx_v9_0_kiq_setting(ring);
3509 
3510 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3511 		/* reset MQD to a clean status */
3512 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3513 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3514 
3515 		/* reset ring buffer */
3516 		ring->wptr = 0;
3517 		amdgpu_ring_clear_ring(ring);
3518 
3519 		mutex_lock(&adev->srbm_mutex);
3520 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3521 		gfx_v9_0_kiq_init_register(ring);
3522 		soc15_grbm_select(adev, 0, 0, 0, 0);
3523 		mutex_unlock(&adev->srbm_mutex);
3524 	} else {
3525 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3526 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3527 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3528 		mutex_lock(&adev->srbm_mutex);
3529 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3530 		gfx_v9_0_mqd_init(ring);
3531 		gfx_v9_0_kiq_init_register(ring);
3532 		soc15_grbm_select(adev, 0, 0, 0, 0);
3533 		mutex_unlock(&adev->srbm_mutex);
3534 
3535 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3536 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3537 	}
3538 
3539 	return 0;
3540 }
3541 
3542 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3543 {
3544 	struct amdgpu_device *adev = ring->adev;
3545 	struct v9_mqd *mqd = ring->mqd_ptr;
3546 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3547 
3548 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3549 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3550 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3551 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3552 		mutex_lock(&adev->srbm_mutex);
3553 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3554 		gfx_v9_0_mqd_init(ring);
3555 		soc15_grbm_select(adev, 0, 0, 0, 0);
3556 		mutex_unlock(&adev->srbm_mutex);
3557 
3558 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3559 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3560 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3561 		/* reset MQD to a clean status */
3562 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3563 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3564 
3565 		/* reset ring buffer */
3566 		ring->wptr = 0;
3567 		amdgpu_ring_clear_ring(ring);
3568 	} else {
3569 		amdgpu_ring_clear_ring(ring);
3570 	}
3571 
3572 	return 0;
3573 }
3574 
3575 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3576 {
3577 	struct amdgpu_ring *ring;
3578 	int r;
3579 
3580 	ring = &adev->gfx.kiq.ring;
3581 
3582 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3583 	if (unlikely(r != 0))
3584 		return r;
3585 
3586 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3587 	if (unlikely(r != 0))
3588 		return r;
3589 
3590 	gfx_v9_0_kiq_init_queue(ring);
3591 	amdgpu_bo_kunmap(ring->mqd_obj);
3592 	ring->mqd_ptr = NULL;
3593 	amdgpu_bo_unreserve(ring->mqd_obj);
3594 	ring->sched.ready = true;
3595 	return 0;
3596 }
3597 
3598 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3599 {
3600 	struct amdgpu_ring *ring = NULL;
3601 	int r = 0, i;
3602 
3603 	gfx_v9_0_cp_compute_enable(adev, true);
3604 
3605 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3606 		ring = &adev->gfx.compute_ring[i];
3607 
3608 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3609 		if (unlikely(r != 0))
3610 			goto done;
3611 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3612 		if (!r) {
3613 			r = gfx_v9_0_kcq_init_queue(ring);
3614 			amdgpu_bo_kunmap(ring->mqd_obj);
3615 			ring->mqd_ptr = NULL;
3616 		}
3617 		amdgpu_bo_unreserve(ring->mqd_obj);
3618 		if (r)
3619 			goto done;
3620 	}
3621 
3622 	r = gfx_v9_0_kiq_kcq_enable(adev);
3623 done:
3624 	return r;
3625 }
3626 
3627 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3628 {
3629 	int r, i;
3630 	struct amdgpu_ring *ring;
3631 
3632 	if (!(adev->flags & AMD_IS_APU))
3633 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3634 
3635 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3636 		if (adev->asic_type != CHIP_ARCTURUS) {
3637 			/* legacy firmware loading */
3638 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3639 			if (r)
3640 				return r;
3641 		}
3642 
3643 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3644 		if (r)
3645 			return r;
3646 	}
3647 
3648 	r = gfx_v9_0_kiq_resume(adev);
3649 	if (r)
3650 		return r;
3651 
3652 	if (adev->asic_type != CHIP_ARCTURUS) {
3653 		r = gfx_v9_0_cp_gfx_resume(adev);
3654 		if (r)
3655 			return r;
3656 	}
3657 
3658 	r = gfx_v9_0_kcq_resume(adev);
3659 	if (r)
3660 		return r;
3661 
3662 	if (adev->asic_type != CHIP_ARCTURUS) {
3663 		ring = &adev->gfx.gfx_ring[0];
3664 		r = amdgpu_ring_test_helper(ring);
3665 		if (r)
3666 			return r;
3667 	}
3668 
3669 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3670 		ring = &adev->gfx.compute_ring[i];
3671 		amdgpu_ring_test_helper(ring);
3672 	}
3673 
3674 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3675 
3676 	return 0;
3677 }
3678 
3679 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3680 {
3681 	if (adev->asic_type != CHIP_ARCTURUS)
3682 		gfx_v9_0_cp_gfx_enable(adev, enable);
3683 	gfx_v9_0_cp_compute_enable(adev, enable);
3684 }
3685 
3686 static int gfx_v9_0_hw_init(void *handle)
3687 {
3688 	int r;
3689 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3690 
3691 	if (!amdgpu_sriov_vf(adev))
3692 		gfx_v9_0_init_golden_registers(adev);
3693 
3694 	gfx_v9_0_constants_init(adev);
3695 
3696 	r = gfx_v9_0_csb_vram_pin(adev);
3697 	if (r)
3698 		return r;
3699 
3700 	r = adev->gfx.rlc.funcs->resume(adev);
3701 	if (r)
3702 		return r;
3703 
3704 	r = gfx_v9_0_cp_resume(adev);
3705 	if (r)
3706 		return r;
3707 
3708 	return r;
3709 }
3710 
3711 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3712 {
3713 	int r, i;
3714 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3715 
3716 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3717 	if (r)
3718 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3719 
3720 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3721 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3722 
3723 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3724 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3725 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3726 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3727 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3728 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3729 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3730 		amdgpu_ring_write(kiq_ring, 0);
3731 		amdgpu_ring_write(kiq_ring, 0);
3732 		amdgpu_ring_write(kiq_ring, 0);
3733 	}
3734 	r = amdgpu_ring_test_helper(kiq_ring);
3735 	if (r)
3736 		DRM_ERROR("KCQ disable failed\n");
3737 
3738 	return r;
3739 }
3740 
3741 static int gfx_v9_0_hw_fini(void *handle)
3742 {
3743 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3744 
3745 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3746 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3747 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3748 
3749 	/* DF freeze and kcq disable will fail */
3750 	if (!amdgpu_ras_intr_triggered())
3751 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3752 		gfx_v9_0_kcq_disable(adev);
3753 
3754 	if (amdgpu_sriov_vf(adev)) {
3755 		gfx_v9_0_cp_gfx_enable(adev, false);
3756 		/* must disable polling for SRIOV when hw finished, otherwise
3757 		 * CPC engine may still keep fetching WB address which is already
3758 		 * invalid after sw finished and trigger DMAR reading error in
3759 		 * hypervisor side.
3760 		 */
3761 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3762 		return 0;
3763 	}
3764 
3765 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3766 	 * otherwise KIQ is hanging when binding back
3767 	 */
3768 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3769 		mutex_lock(&adev->srbm_mutex);
3770 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3771 				adev->gfx.kiq.ring.pipe,
3772 				adev->gfx.kiq.ring.queue, 0);
3773 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3774 		soc15_grbm_select(adev, 0, 0, 0, 0);
3775 		mutex_unlock(&adev->srbm_mutex);
3776 	}
3777 
3778 	gfx_v9_0_cp_enable(adev, false);
3779 	adev->gfx.rlc.funcs->stop(adev);
3780 
3781 	gfx_v9_0_csb_vram_unpin(adev);
3782 
3783 	return 0;
3784 }
3785 
3786 static int gfx_v9_0_suspend(void *handle)
3787 {
3788 	return gfx_v9_0_hw_fini(handle);
3789 }
3790 
3791 static int gfx_v9_0_resume(void *handle)
3792 {
3793 	return gfx_v9_0_hw_init(handle);
3794 }
3795 
3796 static bool gfx_v9_0_is_idle(void *handle)
3797 {
3798 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3799 
3800 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3801 				GRBM_STATUS, GUI_ACTIVE))
3802 		return false;
3803 	else
3804 		return true;
3805 }
3806 
3807 static int gfx_v9_0_wait_for_idle(void *handle)
3808 {
3809 	unsigned i;
3810 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3811 
3812 	for (i = 0; i < adev->usec_timeout; i++) {
3813 		if (gfx_v9_0_is_idle(handle))
3814 			return 0;
3815 		udelay(1);
3816 	}
3817 	return -ETIMEDOUT;
3818 }
3819 
3820 static int gfx_v9_0_soft_reset(void *handle)
3821 {
3822 	u32 grbm_soft_reset = 0;
3823 	u32 tmp;
3824 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3825 
3826 	/* GRBM_STATUS */
3827 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3828 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3829 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3830 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3831 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3832 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3833 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3834 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3835 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3836 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3837 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3838 	}
3839 
3840 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3841 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843 	}
3844 
3845 	/* GRBM_STATUS2 */
3846 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3847 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3848 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3850 
3851 
3852 	if (grbm_soft_reset) {
3853 		/* stop the rlc */
3854 		adev->gfx.rlc.funcs->stop(adev);
3855 
3856 		if (adev->asic_type != CHIP_ARCTURUS)
3857 			/* Disable GFX parsing/prefetching */
3858 			gfx_v9_0_cp_gfx_enable(adev, false);
3859 
3860 		/* Disable MEC parsing/prefetching */
3861 		gfx_v9_0_cp_compute_enable(adev, false);
3862 
3863 		if (grbm_soft_reset) {
3864 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3865 			tmp |= grbm_soft_reset;
3866 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3867 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3868 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3869 
3870 			udelay(50);
3871 
3872 			tmp &= ~grbm_soft_reset;
3873 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3874 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3875 		}
3876 
3877 		/* Wait a little for things to settle down */
3878 		udelay(50);
3879 	}
3880 	return 0;
3881 }
3882 
3883 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3884 {
3885 	uint64_t clock;
3886 
3887 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3888 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3889 		uint32_t tmp, lsb, msb, i = 0;
3890 		do {
3891 			if (i != 0)
3892 				udelay(1);
3893 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3894 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3895 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3896 			i++;
3897 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3898 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3899 	} else {
3900 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3901 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3902 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3903 	}
3904 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3905 	return clock;
3906 }
3907 
3908 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3909 					  uint32_t vmid,
3910 					  uint32_t gds_base, uint32_t gds_size,
3911 					  uint32_t gws_base, uint32_t gws_size,
3912 					  uint32_t oa_base, uint32_t oa_size)
3913 {
3914 	struct amdgpu_device *adev = ring->adev;
3915 
3916 	/* GDS Base */
3917 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3918 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3919 				   gds_base);
3920 
3921 	/* GDS Size */
3922 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3923 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3924 				   gds_size);
3925 
3926 	/* GWS */
3927 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3928 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3929 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3930 
3931 	/* OA */
3932 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3933 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3934 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3935 }
3936 
3937 static const u32 vgpr_init_compute_shader[] =
3938 {
3939 	0xb07c0000, 0xbe8000ff,
3940 	0x000000f8, 0xbf110800,
3941 	0x7e000280, 0x7e020280,
3942 	0x7e040280, 0x7e060280,
3943 	0x7e080280, 0x7e0a0280,
3944 	0x7e0c0280, 0x7e0e0280,
3945 	0x80808800, 0xbe803200,
3946 	0xbf84fff5, 0xbf9c0000,
3947 	0xd28c0001, 0x0001007f,
3948 	0xd28d0001, 0x0002027e,
3949 	0x10020288, 0xb8810904,
3950 	0xb7814000, 0xd1196a01,
3951 	0x00000301, 0xbe800087,
3952 	0xbefc00c1, 0xd89c4000,
3953 	0x00020201, 0xd89cc080,
3954 	0x00040401, 0x320202ff,
3955 	0x00000800, 0x80808100,
3956 	0xbf84fff8, 0x7e020280,
3957 	0xbf810000, 0x00000000,
3958 };
3959 
3960 static const u32 sgpr_init_compute_shader[] =
3961 {
3962 	0xb07c0000, 0xbe8000ff,
3963 	0x0000005f, 0xbee50080,
3964 	0xbe812c65, 0xbe822c65,
3965 	0xbe832c65, 0xbe842c65,
3966 	0xbe852c65, 0xb77c0005,
3967 	0x80808500, 0xbf84fff8,
3968 	0xbe800080, 0xbf810000,
3969 };
3970 
3971 static const struct soc15_reg_entry vgpr_init_regs[] = {
3972    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3973    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3974    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3975    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3976    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3977    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3978    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3979    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3980    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3981    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3982 };
3983 
3984 static const struct soc15_reg_entry sgpr_init_regs[] = {
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3989    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3990    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3991    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3995 };
3996 
3997 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3998    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3999    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4000    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4001    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4002    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4003    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4004    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4005    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4006    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4007    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4008    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4009    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4010    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4011    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4012    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4013    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4014    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4015    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4016    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4017    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4018    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4019    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4020    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4021    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4022    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4023    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4024    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4025    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4026    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4027    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4028    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4029    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4030    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4031 };
4032 
4033 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4034 {
4035 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4036 	int i, r;
4037 
4038 	/* only support when RAS is enabled */
4039 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4040 		return 0;
4041 
4042 	r = amdgpu_ring_alloc(ring, 7);
4043 	if (r) {
4044 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4045 			ring->name, r);
4046 		return r;
4047 	}
4048 
4049 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4050 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4051 
4052 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4053 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4054 				PACKET3_DMA_DATA_DST_SEL(1) |
4055 				PACKET3_DMA_DATA_SRC_SEL(2) |
4056 				PACKET3_DMA_DATA_ENGINE(0)));
4057 	amdgpu_ring_write(ring, 0);
4058 	amdgpu_ring_write(ring, 0);
4059 	amdgpu_ring_write(ring, 0);
4060 	amdgpu_ring_write(ring, 0);
4061 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4062 				adev->gds.gds_size);
4063 
4064 	amdgpu_ring_commit(ring);
4065 
4066 	for (i = 0; i < adev->usec_timeout; i++) {
4067 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4068 			break;
4069 		udelay(1);
4070 	}
4071 
4072 	if (i >= adev->usec_timeout)
4073 		r = -ETIMEDOUT;
4074 
4075 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4076 
4077 	return r;
4078 }
4079 
4080 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4081 {
4082 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4083 	struct amdgpu_ib ib;
4084 	struct dma_fence *f = NULL;
4085 	int r, i, j, k;
4086 	unsigned total_size, vgpr_offset, sgpr_offset;
4087 	u64 gpu_addr;
4088 
4089 	/* only support when RAS is enabled */
4090 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4091 		return 0;
4092 
4093 	/* bail if the compute ring is not ready */
4094 	if (!ring->sched.ready)
4095 		return 0;
4096 
4097 	total_size =
4098 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4099 	total_size +=
4100 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4101 	total_size = ALIGN(total_size, 256);
4102 	vgpr_offset = total_size;
4103 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4104 	sgpr_offset = total_size;
4105 	total_size += sizeof(sgpr_init_compute_shader);
4106 
4107 	/* allocate an indirect buffer to put the commands in */
4108 	memset(&ib, 0, sizeof(ib));
4109 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4110 	if (r) {
4111 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4112 		return r;
4113 	}
4114 
4115 	/* load the compute shaders */
4116 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4117 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4118 
4119 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4120 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4121 
4122 	/* init the ib length to 0 */
4123 	ib.length_dw = 0;
4124 
4125 	/* VGPR */
4126 	/* write the register state for the compute dispatch */
4127 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4128 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4129 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4130 								- PACKET3_SET_SH_REG_START;
4131 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4132 	}
4133 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4134 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4135 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4136 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4137 							- PACKET3_SET_SH_REG_START;
4138 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4139 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4140 
4141 	/* write dispatch packet */
4142 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4143 	ib.ptr[ib.length_dw++] = 128; /* x */
4144 	ib.ptr[ib.length_dw++] = 1; /* y */
4145 	ib.ptr[ib.length_dw++] = 1; /* z */
4146 	ib.ptr[ib.length_dw++] =
4147 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4148 
4149 	/* write CS partial flush packet */
4150 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4151 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4152 
4153 	/* SGPR */
4154 	/* write the register state for the compute dispatch */
4155 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4156 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4157 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4158 								- PACKET3_SET_SH_REG_START;
4159 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4160 	}
4161 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4162 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4163 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4164 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4165 							- PACKET3_SET_SH_REG_START;
4166 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4167 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4168 
4169 	/* write dispatch packet */
4170 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4171 	ib.ptr[ib.length_dw++] = 128; /* x */
4172 	ib.ptr[ib.length_dw++] = 1; /* y */
4173 	ib.ptr[ib.length_dw++] = 1; /* z */
4174 	ib.ptr[ib.length_dw++] =
4175 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4176 
4177 	/* write CS partial flush packet */
4178 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4179 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4180 
4181 	/* shedule the ib on the ring */
4182 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4183 	if (r) {
4184 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4185 		goto fail;
4186 	}
4187 
4188 	/* wait for the GPU to finish processing the IB */
4189 	r = dma_fence_wait(f, false);
4190 	if (r) {
4191 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4192 		goto fail;
4193 	}
4194 
4195 	/* read back registers to clear the counters */
4196 	mutex_lock(&adev->grbm_idx_mutex);
4197 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4198 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4199 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4200 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4201 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4202 			}
4203 		}
4204 	}
4205 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4206 	mutex_unlock(&adev->grbm_idx_mutex);
4207 
4208 fail:
4209 	amdgpu_ib_free(adev, &ib, NULL);
4210 	dma_fence_put(f);
4211 
4212 	return r;
4213 }
4214 
4215 static int gfx_v9_0_early_init(void *handle)
4216 {
4217 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4218 
4219 	if (adev->asic_type == CHIP_ARCTURUS)
4220 		adev->gfx.num_gfx_rings = 0;
4221 	else
4222 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4223 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4224 	gfx_v9_0_set_ring_funcs(adev);
4225 	gfx_v9_0_set_irq_funcs(adev);
4226 	gfx_v9_0_set_gds_init(adev);
4227 	gfx_v9_0_set_rlc_funcs(adev);
4228 
4229 	return 0;
4230 }
4231 
4232 static int gfx_v9_0_ecc_late_init(void *handle)
4233 {
4234 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4235 	int r;
4236 
4237 	r = amdgpu_gfx_ras_late_init(adev);
4238 	if (r)
4239 		return r;
4240 
4241 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4242 	if (r)
4243 		return r;
4244 
4245 	/* requires IBs so do in late init after IB pool is initialized */
4246 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4247 	if (r)
4248 		return r;
4249 
4250 	return 0;
4251 }
4252 
4253 static int gfx_v9_0_late_init(void *handle)
4254 {
4255 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4256 	int r;
4257 
4258 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4259 	if (r)
4260 		return r;
4261 
4262 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4263 	if (r)
4264 		return r;
4265 
4266 	r = gfx_v9_0_ecc_late_init(handle);
4267 	if (r)
4268 		return r;
4269 
4270 	return 0;
4271 }
4272 
4273 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4274 {
4275 	uint32_t rlc_setting;
4276 
4277 	/* if RLC is not enabled, do nothing */
4278 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4279 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4280 		return false;
4281 
4282 	return true;
4283 }
4284 
4285 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4286 {
4287 	uint32_t data;
4288 	unsigned i;
4289 
4290 	data = RLC_SAFE_MODE__CMD_MASK;
4291 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4292 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4293 
4294 	/* wait for RLC_SAFE_MODE */
4295 	for (i = 0; i < adev->usec_timeout; i++) {
4296 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4297 			break;
4298 		udelay(1);
4299 	}
4300 }
4301 
4302 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4303 {
4304 	uint32_t data;
4305 
4306 	data = RLC_SAFE_MODE__CMD_MASK;
4307 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4308 }
4309 
4310 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4311 						bool enable)
4312 {
4313 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4314 
4315 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4316 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4317 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4318 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4319 	} else {
4320 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4321 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4322 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4323 	}
4324 
4325 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4326 }
4327 
4328 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4329 						bool enable)
4330 {
4331 	/* TODO: double check if we need to perform under safe mode */
4332 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4333 
4334 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4335 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4336 	else
4337 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4338 
4339 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4340 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4341 	else
4342 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4343 
4344 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4345 }
4346 
4347 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4348 						      bool enable)
4349 {
4350 	uint32_t data, def;
4351 
4352 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4353 
4354 	/* It is disabled by HW by default */
4355 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4356 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4357 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4358 
4359 		if (adev->asic_type != CHIP_VEGA12)
4360 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4361 
4362 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4363 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4364 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4365 
4366 		/* only for Vega10 & Raven1 */
4367 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4368 
4369 		if (def != data)
4370 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4371 
4372 		/* MGLS is a global flag to control all MGLS in GFX */
4373 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4374 			/* 2 - RLC memory Light sleep */
4375 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4376 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4377 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4378 				if (def != data)
4379 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4380 			}
4381 			/* 3 - CP memory Light sleep */
4382 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4383 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4384 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4385 				if (def != data)
4386 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4387 			}
4388 		}
4389 	} else {
4390 		/* 1 - MGCG_OVERRIDE */
4391 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4392 
4393 		if (adev->asic_type != CHIP_VEGA12)
4394 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4395 
4396 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4397 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4398 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4399 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4400 
4401 		if (def != data)
4402 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4403 
4404 		/* 2 - disable MGLS in RLC */
4405 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4406 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4407 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4408 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4409 		}
4410 
4411 		/* 3 - disable MGLS in CP */
4412 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4413 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4414 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4415 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4416 		}
4417 	}
4418 
4419 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4420 }
4421 
4422 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4423 					   bool enable)
4424 {
4425 	uint32_t data, def;
4426 
4427 	if (adev->asic_type == CHIP_ARCTURUS)
4428 		return;
4429 
4430 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4431 
4432 	/* Enable 3D CGCG/CGLS */
4433 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4434 		/* write cmd to clear cgcg/cgls ov */
4435 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4436 		/* unset CGCG override */
4437 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4438 		/* update CGCG and CGLS override bits */
4439 		if (def != data)
4440 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4441 
4442 		/* enable 3Dcgcg FSM(0x0000363f) */
4443 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4444 
4445 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4446 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4447 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4448 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4449 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4450 		if (def != data)
4451 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4452 
4453 		/* set IDLE_POLL_COUNT(0x00900100) */
4454 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4455 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4456 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4457 		if (def != data)
4458 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4459 	} else {
4460 		/* Disable CGCG/CGLS */
4461 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4462 		/* disable cgcg, cgls should be disabled */
4463 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4464 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4465 		/* disable cgcg and cgls in FSM */
4466 		if (def != data)
4467 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4468 	}
4469 
4470 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4471 }
4472 
4473 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4474 						      bool enable)
4475 {
4476 	uint32_t def, data;
4477 
4478 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4479 
4480 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4481 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4482 		/* unset CGCG override */
4483 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4484 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4485 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4486 		else
4487 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4488 		/* update CGCG and CGLS override bits */
4489 		if (def != data)
4490 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4491 
4492 		/* enable cgcg FSM(0x0000363F) */
4493 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4494 
4495 		if (adev->asic_type == CHIP_ARCTURUS)
4496 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4497 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4498 		else
4499 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4500 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4501 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4502 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4503 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4504 		if (def != data)
4505 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4506 
4507 		/* set IDLE_POLL_COUNT(0x00900100) */
4508 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4509 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4510 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4511 		if (def != data)
4512 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4513 	} else {
4514 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4515 		/* reset CGCG/CGLS bits */
4516 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4517 		/* disable cgcg and cgls in FSM */
4518 		if (def != data)
4519 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4520 	}
4521 
4522 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4523 }
4524 
4525 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4526 					    bool enable)
4527 {
4528 	if (enable) {
4529 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4530 		 * ===  MGCG + MGLS ===
4531 		 */
4532 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4533 		/* ===  CGCG /CGLS for GFX 3D Only === */
4534 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4535 		/* ===  CGCG + CGLS === */
4536 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4537 	} else {
4538 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4539 		 * ===  CGCG + CGLS ===
4540 		 */
4541 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4542 		/* ===  CGCG /CGLS for GFX 3D Only === */
4543 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4544 		/* ===  MGCG + MGLS === */
4545 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4546 	}
4547 	return 0;
4548 }
4549 
4550 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4551 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4552 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4553 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4554 	.init = gfx_v9_0_rlc_init,
4555 	.get_csb_size = gfx_v9_0_get_csb_size,
4556 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4557 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4558 	.resume = gfx_v9_0_rlc_resume,
4559 	.stop = gfx_v9_0_rlc_stop,
4560 	.reset = gfx_v9_0_rlc_reset,
4561 	.start = gfx_v9_0_rlc_start
4562 };
4563 
4564 static int gfx_v9_0_set_powergating_state(void *handle,
4565 					  enum amd_powergating_state state)
4566 {
4567 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4568 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4569 
4570 	switch (adev->asic_type) {
4571 	case CHIP_RAVEN:
4572 	case CHIP_RENOIR:
4573 		if (!enable) {
4574 			amdgpu_gfx_off_ctrl(adev, false);
4575 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4576 		}
4577 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4578 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4579 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4580 		} else {
4581 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4582 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4583 		}
4584 
4585 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4586 			gfx_v9_0_enable_cp_power_gating(adev, true);
4587 		else
4588 			gfx_v9_0_enable_cp_power_gating(adev, false);
4589 
4590 		/* update gfx cgpg state */
4591 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4592 
4593 		/* update mgcg state */
4594 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4595 
4596 		if (enable)
4597 			amdgpu_gfx_off_ctrl(adev, true);
4598 		break;
4599 	case CHIP_VEGA12:
4600 		if (!enable) {
4601 			amdgpu_gfx_off_ctrl(adev, false);
4602 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4603 		} else {
4604 			amdgpu_gfx_off_ctrl(adev, true);
4605 		}
4606 		break;
4607 	default:
4608 		break;
4609 	}
4610 
4611 	return 0;
4612 }
4613 
4614 static int gfx_v9_0_set_clockgating_state(void *handle,
4615 					  enum amd_clockgating_state state)
4616 {
4617 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4618 
4619 	if (amdgpu_sriov_vf(adev))
4620 		return 0;
4621 
4622 	switch (adev->asic_type) {
4623 	case CHIP_VEGA10:
4624 	case CHIP_VEGA12:
4625 	case CHIP_VEGA20:
4626 	case CHIP_RAVEN:
4627 	case CHIP_ARCTURUS:
4628 	case CHIP_RENOIR:
4629 		gfx_v9_0_update_gfx_clock_gating(adev,
4630 						 state == AMD_CG_STATE_GATE ? true : false);
4631 		break;
4632 	default:
4633 		break;
4634 	}
4635 	return 0;
4636 }
4637 
4638 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4639 {
4640 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4641 	int data;
4642 
4643 	if (amdgpu_sriov_vf(adev))
4644 		*flags = 0;
4645 
4646 	/* AMD_CG_SUPPORT_GFX_MGCG */
4647 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4648 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4649 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4650 
4651 	/* AMD_CG_SUPPORT_GFX_CGCG */
4652 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4653 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4654 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4655 
4656 	/* AMD_CG_SUPPORT_GFX_CGLS */
4657 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4658 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4659 
4660 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4661 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4662 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4663 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4664 
4665 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4666 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4667 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4668 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4669 
4670 	if (adev->asic_type != CHIP_ARCTURUS) {
4671 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4672 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4673 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4674 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4675 
4676 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4677 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4678 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4679 	}
4680 }
4681 
4682 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4683 {
4684 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4685 }
4686 
4687 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4688 {
4689 	struct amdgpu_device *adev = ring->adev;
4690 	u64 wptr;
4691 
4692 	/* XXX check if swapping is necessary on BE */
4693 	if (ring->use_doorbell) {
4694 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4695 	} else {
4696 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4697 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4698 	}
4699 
4700 	return wptr;
4701 }
4702 
4703 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4704 {
4705 	struct amdgpu_device *adev = ring->adev;
4706 
4707 	if (ring->use_doorbell) {
4708 		/* XXX check if swapping is necessary on BE */
4709 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4710 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4711 	} else {
4712 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4713 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4714 	}
4715 }
4716 
4717 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4718 {
4719 	struct amdgpu_device *adev = ring->adev;
4720 	u32 ref_and_mask, reg_mem_engine;
4721 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4722 
4723 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4724 		switch (ring->me) {
4725 		case 1:
4726 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4727 			break;
4728 		case 2:
4729 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4730 			break;
4731 		default:
4732 			return;
4733 		}
4734 		reg_mem_engine = 0;
4735 	} else {
4736 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4737 		reg_mem_engine = 1; /* pfp */
4738 	}
4739 
4740 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4741 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4742 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4743 			      ref_and_mask, ref_and_mask, 0x20);
4744 }
4745 
4746 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4747 					struct amdgpu_job *job,
4748 					struct amdgpu_ib *ib,
4749 					uint32_t flags)
4750 {
4751 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4752 	u32 header, control = 0;
4753 
4754 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4755 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4756 	else
4757 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4758 
4759 	control |= ib->length_dw | (vmid << 24);
4760 
4761 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4762 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4763 
4764 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4765 			gfx_v9_0_ring_emit_de_meta(ring);
4766 	}
4767 
4768 	amdgpu_ring_write(ring, header);
4769 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4770 	amdgpu_ring_write(ring,
4771 #ifdef __BIG_ENDIAN
4772 		(2 << 0) |
4773 #endif
4774 		lower_32_bits(ib->gpu_addr));
4775 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4776 	amdgpu_ring_write(ring, control);
4777 }
4778 
4779 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4780 					  struct amdgpu_job *job,
4781 					  struct amdgpu_ib *ib,
4782 					  uint32_t flags)
4783 {
4784 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4785 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4786 
4787 	/* Currently, there is a high possibility to get wave ID mismatch
4788 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4789 	 * different wave IDs than the GDS expects. This situation happens
4790 	 * randomly when at least 5 compute pipes use GDS ordered append.
4791 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4792 	 * Those are probably bugs somewhere else in the kernel driver.
4793 	 *
4794 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4795 	 * GDS to 0 for this ring (me/pipe).
4796 	 */
4797 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4798 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4799 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4800 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4801 	}
4802 
4803 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4804 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4805 	amdgpu_ring_write(ring,
4806 #ifdef __BIG_ENDIAN
4807 				(2 << 0) |
4808 #endif
4809 				lower_32_bits(ib->gpu_addr));
4810 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4811 	amdgpu_ring_write(ring, control);
4812 }
4813 
4814 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4815 				     u64 seq, unsigned flags)
4816 {
4817 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4818 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4819 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4820 
4821 	/* RELEASE_MEM - flush caches, send int */
4822 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4823 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4824 					       EOP_TC_NC_ACTION_EN) :
4825 					      (EOP_TCL1_ACTION_EN |
4826 					       EOP_TC_ACTION_EN |
4827 					       EOP_TC_WB_ACTION_EN |
4828 					       EOP_TC_MD_ACTION_EN)) |
4829 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4830 				 EVENT_INDEX(5)));
4831 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4832 
4833 	/*
4834 	 * the address should be Qword aligned if 64bit write, Dword
4835 	 * aligned if only send 32bit data low (discard data high)
4836 	 */
4837 	if (write64bit)
4838 		BUG_ON(addr & 0x7);
4839 	else
4840 		BUG_ON(addr & 0x3);
4841 	amdgpu_ring_write(ring, lower_32_bits(addr));
4842 	amdgpu_ring_write(ring, upper_32_bits(addr));
4843 	amdgpu_ring_write(ring, lower_32_bits(seq));
4844 	amdgpu_ring_write(ring, upper_32_bits(seq));
4845 	amdgpu_ring_write(ring, 0);
4846 }
4847 
4848 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4849 {
4850 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4851 	uint32_t seq = ring->fence_drv.sync_seq;
4852 	uint64_t addr = ring->fence_drv.gpu_addr;
4853 
4854 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4855 			      lower_32_bits(addr), upper_32_bits(addr),
4856 			      seq, 0xffffffff, 4);
4857 }
4858 
4859 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4860 					unsigned vmid, uint64_t pd_addr)
4861 {
4862 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4863 
4864 	/* compute doesn't have PFP */
4865 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4866 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4867 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4868 		amdgpu_ring_write(ring, 0x0);
4869 	}
4870 }
4871 
4872 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4873 {
4874 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4875 }
4876 
4877 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4878 {
4879 	u64 wptr;
4880 
4881 	/* XXX check if swapping is necessary on BE */
4882 	if (ring->use_doorbell)
4883 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4884 	else
4885 		BUG();
4886 	return wptr;
4887 }
4888 
4889 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4890 					   bool acquire)
4891 {
4892 	struct amdgpu_device *adev = ring->adev;
4893 	int pipe_num, tmp, reg;
4894 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4895 
4896 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4897 
4898 	/* first me only has 2 entries, GFX and HP3D */
4899 	if (ring->me > 0)
4900 		pipe_num -= 2;
4901 
4902 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4903 	tmp = RREG32(reg);
4904 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4905 	WREG32(reg, tmp);
4906 }
4907 
4908 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4909 					    struct amdgpu_ring *ring,
4910 					    bool acquire)
4911 {
4912 	int i, pipe;
4913 	bool reserve;
4914 	struct amdgpu_ring *iring;
4915 
4916 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4917 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4918 	if (acquire)
4919 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4920 	else
4921 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4922 
4923 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4924 		/* Clear all reservations - everyone reacquires all resources */
4925 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4926 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4927 						       true);
4928 
4929 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4930 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4931 						       true);
4932 	} else {
4933 		/* Lower all pipes without a current reservation */
4934 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4935 			iring = &adev->gfx.gfx_ring[i];
4936 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4937 							   iring->me,
4938 							   iring->pipe,
4939 							   0);
4940 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4941 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4942 		}
4943 
4944 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4945 			iring = &adev->gfx.compute_ring[i];
4946 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4947 							   iring->me,
4948 							   iring->pipe,
4949 							   0);
4950 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4951 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4952 		}
4953 	}
4954 
4955 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4956 }
4957 
4958 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4959 				      struct amdgpu_ring *ring,
4960 				      bool acquire)
4961 {
4962 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4963 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4964 
4965 	mutex_lock(&adev->srbm_mutex);
4966 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4967 
4968 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4969 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4970 
4971 	soc15_grbm_select(adev, 0, 0, 0, 0);
4972 	mutex_unlock(&adev->srbm_mutex);
4973 }
4974 
4975 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4976 					       enum drm_sched_priority priority)
4977 {
4978 	struct amdgpu_device *adev = ring->adev;
4979 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4980 
4981 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4982 		return;
4983 
4984 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4985 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4986 }
4987 
4988 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4989 {
4990 	struct amdgpu_device *adev = ring->adev;
4991 
4992 	/* XXX check if swapping is necessary on BE */
4993 	if (ring->use_doorbell) {
4994 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4995 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4996 	} else{
4997 		BUG(); /* only DOORBELL method supported on gfx9 now */
4998 	}
4999 }
5000 
5001 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5002 					 u64 seq, unsigned int flags)
5003 {
5004 	struct amdgpu_device *adev = ring->adev;
5005 
5006 	/* we only allocate 32bit for each seq wb address */
5007 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5008 
5009 	/* write fence seq to the "addr" */
5010 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5011 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5012 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5013 	amdgpu_ring_write(ring, lower_32_bits(addr));
5014 	amdgpu_ring_write(ring, upper_32_bits(addr));
5015 	amdgpu_ring_write(ring, lower_32_bits(seq));
5016 
5017 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5018 		/* set register to trigger INT */
5019 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5020 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5021 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5022 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5023 		amdgpu_ring_write(ring, 0);
5024 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5025 	}
5026 }
5027 
5028 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5029 {
5030 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5031 	amdgpu_ring_write(ring, 0);
5032 }
5033 
5034 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5035 {
5036 	struct v9_ce_ib_state ce_payload = {0};
5037 	uint64_t csa_addr;
5038 	int cnt;
5039 
5040 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5041 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5042 
5043 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5044 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5045 				 WRITE_DATA_DST_SEL(8) |
5046 				 WR_CONFIRM) |
5047 				 WRITE_DATA_CACHE_POLICY(0));
5048 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5049 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5050 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5051 }
5052 
5053 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5054 {
5055 	struct v9_de_ib_state de_payload = {0};
5056 	uint64_t csa_addr, gds_addr;
5057 	int cnt;
5058 
5059 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5060 	gds_addr = csa_addr + 4096;
5061 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5062 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5063 
5064 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5065 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5066 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5067 				 WRITE_DATA_DST_SEL(8) |
5068 				 WR_CONFIRM) |
5069 				 WRITE_DATA_CACHE_POLICY(0));
5070 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5071 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5072 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5073 }
5074 
5075 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5076 {
5077 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5078 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5079 }
5080 
5081 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5082 {
5083 	uint32_t dw2 = 0;
5084 
5085 	if (amdgpu_sriov_vf(ring->adev))
5086 		gfx_v9_0_ring_emit_ce_meta(ring);
5087 
5088 	gfx_v9_0_ring_emit_tmz(ring, true);
5089 
5090 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5091 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5092 		/* set load_global_config & load_global_uconfig */
5093 		dw2 |= 0x8001;
5094 		/* set load_cs_sh_regs */
5095 		dw2 |= 0x01000000;
5096 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5097 		dw2 |= 0x10002;
5098 
5099 		/* set load_ce_ram if preamble presented */
5100 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5101 			dw2 |= 0x10000000;
5102 	} else {
5103 		/* still load_ce_ram if this is the first time preamble presented
5104 		 * although there is no context switch happens.
5105 		 */
5106 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5107 			dw2 |= 0x10000000;
5108 	}
5109 
5110 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5111 	amdgpu_ring_write(ring, dw2);
5112 	amdgpu_ring_write(ring, 0);
5113 }
5114 
5115 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5116 {
5117 	unsigned ret;
5118 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5119 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5120 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5121 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5122 	ret = ring->wptr & ring->buf_mask;
5123 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5124 	return ret;
5125 }
5126 
5127 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5128 {
5129 	unsigned cur;
5130 	BUG_ON(offset > ring->buf_mask);
5131 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5132 
5133 	cur = (ring->wptr & ring->buf_mask) - 1;
5134 	if (likely(cur > offset))
5135 		ring->ring[offset] = cur - offset;
5136 	else
5137 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5138 }
5139 
5140 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5141 {
5142 	struct amdgpu_device *adev = ring->adev;
5143 
5144 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5145 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5146 				(5 << 8) |	/* dst: memory */
5147 				(1 << 20));	/* write confirm */
5148 	amdgpu_ring_write(ring, reg);
5149 	amdgpu_ring_write(ring, 0);
5150 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5151 				adev->virt.reg_val_offs * 4));
5152 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5153 				adev->virt.reg_val_offs * 4));
5154 }
5155 
5156 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5157 				    uint32_t val)
5158 {
5159 	uint32_t cmd = 0;
5160 
5161 	switch (ring->funcs->type) {
5162 	case AMDGPU_RING_TYPE_GFX:
5163 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5164 		break;
5165 	case AMDGPU_RING_TYPE_KIQ:
5166 		cmd = (1 << 16); /* no inc addr */
5167 		break;
5168 	default:
5169 		cmd = WR_CONFIRM;
5170 		break;
5171 	}
5172 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5173 	amdgpu_ring_write(ring, cmd);
5174 	amdgpu_ring_write(ring, reg);
5175 	amdgpu_ring_write(ring, 0);
5176 	amdgpu_ring_write(ring, val);
5177 }
5178 
5179 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5180 					uint32_t val, uint32_t mask)
5181 {
5182 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5183 }
5184 
5185 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5186 						  uint32_t reg0, uint32_t reg1,
5187 						  uint32_t ref, uint32_t mask)
5188 {
5189 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5190 	struct amdgpu_device *adev = ring->adev;
5191 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5192 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5193 
5194 	if (fw_version_ok)
5195 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5196 				      ref, mask, 0x20);
5197 	else
5198 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5199 							   ref, mask);
5200 }
5201 
5202 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5203 {
5204 	struct amdgpu_device *adev = ring->adev;
5205 	uint32_t value = 0;
5206 
5207 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5208 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5209 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5210 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5211 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5212 }
5213 
5214 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5215 						 enum amdgpu_interrupt_state state)
5216 {
5217 	switch (state) {
5218 	case AMDGPU_IRQ_STATE_DISABLE:
5219 	case AMDGPU_IRQ_STATE_ENABLE:
5220 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5221 			       TIME_STAMP_INT_ENABLE,
5222 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5223 		break;
5224 	default:
5225 		break;
5226 	}
5227 }
5228 
5229 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5230 						     int me, int pipe,
5231 						     enum amdgpu_interrupt_state state)
5232 {
5233 	u32 mec_int_cntl, mec_int_cntl_reg;
5234 
5235 	/*
5236 	 * amdgpu controls only the first MEC. That's why this function only
5237 	 * handles the setting of interrupts for this specific MEC. All other
5238 	 * pipes' interrupts are set by amdkfd.
5239 	 */
5240 
5241 	if (me == 1) {
5242 		switch (pipe) {
5243 		case 0:
5244 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5245 			break;
5246 		case 1:
5247 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5248 			break;
5249 		case 2:
5250 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5251 			break;
5252 		case 3:
5253 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5254 			break;
5255 		default:
5256 			DRM_DEBUG("invalid pipe %d\n", pipe);
5257 			return;
5258 		}
5259 	} else {
5260 		DRM_DEBUG("invalid me %d\n", me);
5261 		return;
5262 	}
5263 
5264 	switch (state) {
5265 	case AMDGPU_IRQ_STATE_DISABLE:
5266 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5267 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5268 					     TIME_STAMP_INT_ENABLE, 0);
5269 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5270 		break;
5271 	case AMDGPU_IRQ_STATE_ENABLE:
5272 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5273 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5274 					     TIME_STAMP_INT_ENABLE, 1);
5275 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5276 		break;
5277 	default:
5278 		break;
5279 	}
5280 }
5281 
5282 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5283 					     struct amdgpu_irq_src *source,
5284 					     unsigned type,
5285 					     enum amdgpu_interrupt_state state)
5286 {
5287 	switch (state) {
5288 	case AMDGPU_IRQ_STATE_DISABLE:
5289 	case AMDGPU_IRQ_STATE_ENABLE:
5290 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5291 			       PRIV_REG_INT_ENABLE,
5292 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5293 		break;
5294 	default:
5295 		break;
5296 	}
5297 
5298 	return 0;
5299 }
5300 
5301 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5302 					      struct amdgpu_irq_src *source,
5303 					      unsigned type,
5304 					      enum amdgpu_interrupt_state state)
5305 {
5306 	switch (state) {
5307 	case AMDGPU_IRQ_STATE_DISABLE:
5308 	case AMDGPU_IRQ_STATE_ENABLE:
5309 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5310 			       PRIV_INSTR_INT_ENABLE,
5311 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5312 	default:
5313 		break;
5314 	}
5315 
5316 	return 0;
5317 }
5318 
5319 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5320 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5321 			CP_ECC_ERROR_INT_ENABLE, 1)
5322 
5323 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5324 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5325 			CP_ECC_ERROR_INT_ENABLE, 0)
5326 
5327 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5328 					      struct amdgpu_irq_src *source,
5329 					      unsigned type,
5330 					      enum amdgpu_interrupt_state state)
5331 {
5332 	switch (state) {
5333 	case AMDGPU_IRQ_STATE_DISABLE:
5334 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5335 				CP_ECC_ERROR_INT_ENABLE, 0);
5336 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5337 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5338 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5339 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5340 		break;
5341 
5342 	case AMDGPU_IRQ_STATE_ENABLE:
5343 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5344 				CP_ECC_ERROR_INT_ENABLE, 1);
5345 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5346 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5347 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5348 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5349 		break;
5350 	default:
5351 		break;
5352 	}
5353 
5354 	return 0;
5355 }
5356 
5357 
5358 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5359 					    struct amdgpu_irq_src *src,
5360 					    unsigned type,
5361 					    enum amdgpu_interrupt_state state)
5362 {
5363 	switch (type) {
5364 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5365 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5366 		break;
5367 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5368 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5369 		break;
5370 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5371 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5372 		break;
5373 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5374 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5375 		break;
5376 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5377 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5378 		break;
5379 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5380 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5381 		break;
5382 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5383 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5384 		break;
5385 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5386 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5387 		break;
5388 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5389 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5390 		break;
5391 	default:
5392 		break;
5393 	}
5394 	return 0;
5395 }
5396 
5397 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5398 			    struct amdgpu_irq_src *source,
5399 			    struct amdgpu_iv_entry *entry)
5400 {
5401 	int i;
5402 	u8 me_id, pipe_id, queue_id;
5403 	struct amdgpu_ring *ring;
5404 
5405 	DRM_DEBUG("IH: CP EOP\n");
5406 	me_id = (entry->ring_id & 0x0c) >> 2;
5407 	pipe_id = (entry->ring_id & 0x03) >> 0;
5408 	queue_id = (entry->ring_id & 0x70) >> 4;
5409 
5410 	switch (me_id) {
5411 	case 0:
5412 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5413 		break;
5414 	case 1:
5415 	case 2:
5416 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5417 			ring = &adev->gfx.compute_ring[i];
5418 			/* Per-queue interrupt is supported for MEC starting from VI.
5419 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5420 			  */
5421 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5422 				amdgpu_fence_process(ring);
5423 		}
5424 		break;
5425 	}
5426 	return 0;
5427 }
5428 
5429 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5430 			   struct amdgpu_iv_entry *entry)
5431 {
5432 	u8 me_id, pipe_id, queue_id;
5433 	struct amdgpu_ring *ring;
5434 	int i;
5435 
5436 	me_id = (entry->ring_id & 0x0c) >> 2;
5437 	pipe_id = (entry->ring_id & 0x03) >> 0;
5438 	queue_id = (entry->ring_id & 0x70) >> 4;
5439 
5440 	switch (me_id) {
5441 	case 0:
5442 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5443 		break;
5444 	case 1:
5445 	case 2:
5446 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5447 			ring = &adev->gfx.compute_ring[i];
5448 			if (ring->me == me_id && ring->pipe == pipe_id &&
5449 			    ring->queue == queue_id)
5450 				drm_sched_fault(&ring->sched);
5451 		}
5452 		break;
5453 	}
5454 }
5455 
5456 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5457 				 struct amdgpu_irq_src *source,
5458 				 struct amdgpu_iv_entry *entry)
5459 {
5460 	DRM_ERROR("Illegal register access in command stream\n");
5461 	gfx_v9_0_fault(adev, entry);
5462 	return 0;
5463 }
5464 
5465 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5466 				  struct amdgpu_irq_src *source,
5467 				  struct amdgpu_iv_entry *entry)
5468 {
5469 	DRM_ERROR("Illegal instruction in command stream\n");
5470 	gfx_v9_0_fault(adev, entry);
5471 	return 0;
5472 }
5473 
5474 
5475 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5476 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5477 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5478 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5479 	},
5480 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5481 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5482 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5483 	},
5484 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5485 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5486 	  0, 0
5487 	},
5488 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5489 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5490 	  0, 0
5491 	},
5492 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5493 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5494 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5495 	},
5496 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5497 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5498 	  0, 0
5499 	},
5500 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5501 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5502 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5503 	},
5504 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5505 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5506 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5507 	},
5508 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5509 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5510 	  0, 0
5511 	},
5512 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5513 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5514 	  0, 0
5515 	},
5516 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5517 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5518 	  0, 0
5519 	},
5520 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5521 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5522 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5523 	},
5524 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5525 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5526 	  0, 0
5527 	},
5528 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5529 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5530 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5531 	},
5532 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5533 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5534 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5535 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5536 	},
5537 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5538 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5539 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5540 	  0, 0
5541 	},
5542 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5543 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5544 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5545 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5546 	},
5547 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5548 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5549 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5550 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5551 	},
5552 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5553 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5554 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5555 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5556 	},
5557 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5558 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5559 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5560 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5561 	},
5562 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5563 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5564 	  0, 0
5565 	},
5566 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5567 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5568 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5569 	},
5570 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5571 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5572 	  0, 0
5573 	},
5574 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5575 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5576 	  0, 0
5577 	},
5578 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5579 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5580 	  0, 0
5581 	},
5582 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5583 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5584 	  0, 0
5585 	},
5586 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5587 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5588 	  0, 0
5589 	},
5590 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5591 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5592 	  0, 0
5593 	},
5594 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5595 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5596 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5597 	},
5598 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5599 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5600 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5601 	},
5602 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5603 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5604 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5605 	},
5606 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5607 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5608 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5609 	},
5610 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5611 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5612 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5613 	},
5614 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5615 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5616 	  0, 0
5617 	},
5618 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5619 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5620 	  0, 0
5621 	},
5622 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5623 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5624 	  0, 0
5625 	},
5626 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5627 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5628 	  0, 0
5629 	},
5630 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5631 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5632 	  0, 0
5633 	},
5634 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5635 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5636 	  0, 0
5637 	},
5638 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5639 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5640 	  0, 0
5641 	},
5642 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5643 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5644 	  0, 0
5645 	},
5646 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5647 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5648 	  0, 0
5649 	},
5650 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5651 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5652 	  0, 0
5653 	},
5654 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5655 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5656 	  0, 0
5657 	},
5658 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5659 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5660 	  0, 0
5661 	},
5662 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5663 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5664 	  0, 0
5665 	},
5666 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5667 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5668 	  0, 0
5669 	},
5670 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5671 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5672 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5673 	},
5674 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5675 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5676 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5677 	},
5678 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5679 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5680 	  0, 0
5681 	},
5682 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5683 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5684 	  0, 0
5685 	},
5686 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5687 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5688 	  0, 0
5689 	},
5690 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5691 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5692 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5693 	},
5694 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5695 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5696 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5697 	},
5698 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5699 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5700 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5701 	},
5702 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5703 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5704 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5705 	},
5706 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5707 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5708 	  0, 0
5709 	},
5710 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5711 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5712 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5713 	},
5714 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5715 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5716 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5717 	},
5718 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5719 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5720 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5721 	},
5722 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5723 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5724 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5725 	},
5726 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5727 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5728 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5729 	},
5730 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5731 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5732 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5733 	},
5734 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5735 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5736 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5737 	},
5738 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5739 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5740 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5741 	},
5742 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5743 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5744 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5745 	},
5746 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5747 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5748 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5749 	},
5750 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5751 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5752 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5753 	},
5754 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5755 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5756 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5757 	},
5758 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5759 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5760 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5761 	},
5762 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5763 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5764 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5765 	},
5766 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5767 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5768 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5769 	},
5770 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5771 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5772 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5773 	},
5774 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5775 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5776 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5777 	},
5778 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5779 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5780 	  0, 0
5781 	},
5782 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5783 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5784 	  0, 0
5785 	},
5786 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5787 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5788 	  0, 0
5789 	},
5790 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5791 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5792 	  0, 0
5793 	},
5794 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5795 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5796 	  0, 0
5797 	},
5798 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5799 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5800 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5801 	},
5802 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5803 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5804 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5805 	},
5806 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5807 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5808 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5809 	},
5810 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5811 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5812 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5813 	},
5814 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5815 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5816 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5817 	},
5818 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5819 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5820 	  0, 0
5821 	},
5822 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5823 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5824 	  0, 0
5825 	},
5826 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5827 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5828 	  0, 0
5829 	},
5830 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5831 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5832 	  0, 0
5833 	},
5834 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5835 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5836 	  0, 0
5837 	},
5838 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5839 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5840 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5841 	},
5842 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5843 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5844 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5845 	},
5846 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5847 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5848 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5849 	},
5850 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5851 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5852 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5853 	},
5854 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5855 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5856 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5857 	},
5858 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5859 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5860 	  0, 0
5861 	},
5862 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5863 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5864 	  0, 0
5865 	},
5866 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5867 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5868 	  0, 0
5869 	},
5870 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5871 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5872 	  0, 0
5873 	},
5874 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5875 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5876 	  0, 0
5877 	},
5878 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5879 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5880 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5881 	},
5882 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5883 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5884 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5885 	},
5886 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5887 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5888 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5889 	},
5890 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5891 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5892 	  0, 0
5893 	},
5894 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5895 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5896 	  0, 0
5897 	},
5898 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5899 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5900 	  0, 0
5901 	},
5902 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5903 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5904 	  0, 0
5905 	},
5906 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5907 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5908 	  0, 0
5909 	},
5910 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5911 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5912 	  0, 0
5913 	}
5914 };
5915 
5916 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5917 				     void *inject_if)
5918 {
5919 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5920 	int ret;
5921 	struct ta_ras_trigger_error_input block_info = { 0 };
5922 
5923 	if (adev->asic_type != CHIP_VEGA20)
5924 		return -EINVAL;
5925 
5926 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5927 		return -EINVAL;
5928 
5929 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5930 		return -EPERM;
5931 
5932 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5933 	      info->head.type)) {
5934 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5935 			ras_gfx_subblocks[info->head.sub_block_index].name,
5936 			info->head.type);
5937 		return -EPERM;
5938 	}
5939 
5940 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5941 	      info->head.type)) {
5942 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5943 			ras_gfx_subblocks[info->head.sub_block_index].name,
5944 			info->head.type);
5945 		return -EPERM;
5946 	}
5947 
5948 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5949 	block_info.sub_block_index =
5950 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5951 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5952 	block_info.address = info->address;
5953 	block_info.value = info->value;
5954 
5955 	mutex_lock(&adev->grbm_idx_mutex);
5956 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
5957 	mutex_unlock(&adev->grbm_idx_mutex);
5958 
5959 	return ret;
5960 }
5961 
5962 static const char *vml2_mems[] = {
5963 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5964 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5965 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
5966 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
5967 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5968 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5969 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
5970 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
5971 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5972 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5973 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
5974 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
5975 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5976 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5977 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
5978 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
5979 };
5980 
5981 static const char *vml2_walker_mems[] = {
5982 	"UTC_VML2_CACHE_PDE0_MEM0",
5983 	"UTC_VML2_CACHE_PDE0_MEM1",
5984 	"UTC_VML2_CACHE_PDE1_MEM0",
5985 	"UTC_VML2_CACHE_PDE1_MEM1",
5986 	"UTC_VML2_CACHE_PDE2_MEM0",
5987 	"UTC_VML2_CACHE_PDE2_MEM1",
5988 	"UTC_VML2_RDIF_LOG_FIFO",
5989 };
5990 
5991 static const char *atc_l2_cache_2m_mems[] = {
5992 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
5993 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
5994 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
5995 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
5996 };
5997 
5998 static const char *atc_l2_cache_4k_mems[] = {
5999 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6000 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6001 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6002 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6003 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6004 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6005 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6006 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6007 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6008 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6009 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6010 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6011 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6012 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6013 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6014 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6015 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6016 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6017 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6018 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6019 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6020 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6021 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6022 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6023 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6024 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6025 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6026 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6027 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6028 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6029 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6030 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6031 };
6032 
6033 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6034 					 struct ras_err_data *err_data)
6035 {
6036 	uint32_t i, data;
6037 	uint32_t sec_count, ded_count;
6038 
6039 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6040 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6041 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6042 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6043 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6044 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6045 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6046 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6047 
6048 	for (i = 0; i < 16; i++) {
6049 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6050 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6051 
6052 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6053 		if (sec_count) {
6054 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6055 				 vml2_mems[i], sec_count);
6056 			err_data->ce_count += sec_count;
6057 		}
6058 
6059 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6060 		if (ded_count) {
6061 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6062 				 vml2_mems[i], ded_count);
6063 			err_data->ue_count += ded_count;
6064 		}
6065 	}
6066 
6067 	for (i = 0; i < 7; i++) {
6068 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6069 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6070 
6071 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6072 						SEC_COUNT);
6073 		if (sec_count) {
6074 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6075 				 vml2_walker_mems[i], sec_count);
6076 			err_data->ce_count += sec_count;
6077 		}
6078 
6079 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6080 						DED_COUNT);
6081 		if (ded_count) {
6082 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6083 				 vml2_walker_mems[i], ded_count);
6084 			err_data->ue_count += ded_count;
6085 		}
6086 	}
6087 
6088 	for (i = 0; i < 4; i++) {
6089 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6090 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6091 
6092 		sec_count = (data & 0x00006000L) >> 0xd;
6093 		if (sec_count) {
6094 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6095 				 atc_l2_cache_2m_mems[i], sec_count);
6096 			err_data->ce_count += sec_count;
6097 		}
6098 	}
6099 
6100 	for (i = 0; i < 32; i++) {
6101 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6102 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6103 
6104 		sec_count = (data & 0x00006000L) >> 0xd;
6105 		if (sec_count) {
6106 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6107 				 atc_l2_cache_4k_mems[i], sec_count);
6108 			err_data->ce_count += sec_count;
6109 		}
6110 
6111 		ded_count = (data & 0x00018000L) >> 0xf;
6112 		if (ded_count) {
6113 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6114 				 atc_l2_cache_4k_mems[i], ded_count);
6115 			err_data->ue_count += ded_count;
6116 		}
6117 	}
6118 
6119 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6120 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6121 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6122 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6123 
6124 	return 0;
6125 }
6126 
6127 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6128 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6129 	uint32_t *sec_count, uint32_t *ded_count)
6130 {
6131 	uint32_t i;
6132 	uint32_t sec_cnt, ded_cnt;
6133 
6134 	for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6135 		if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6136 			gc_ras_fields_vg20[i].seg != reg->seg ||
6137 			gc_ras_fields_vg20[i].inst != reg->inst)
6138 			continue;
6139 
6140 		sec_cnt = (value &
6141 				gc_ras_fields_vg20[i].sec_count_mask) >>
6142 				gc_ras_fields_vg20[i].sec_count_shift;
6143 		if (sec_cnt) {
6144 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6145 				gc_ras_fields_vg20[i].name,
6146 				se_id, inst_id,
6147 				sec_cnt);
6148 			*sec_count += sec_cnt;
6149 		}
6150 
6151 		ded_cnt = (value &
6152 				gc_ras_fields_vg20[i].ded_count_mask) >>
6153 				gc_ras_fields_vg20[i].ded_count_shift;
6154 		if (ded_cnt) {
6155 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6156 				gc_ras_fields_vg20[i].name,
6157 				se_id, inst_id,
6158 				ded_cnt);
6159 			*ded_count += ded_cnt;
6160 		}
6161 	}
6162 
6163 	return 0;
6164 }
6165 
6166 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6167 					  void *ras_error_status)
6168 {
6169 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6170 	uint32_t sec_count = 0, ded_count = 0;
6171 	uint32_t i, j, k;
6172 	uint32_t reg_value;
6173 
6174 	if (adev->asic_type != CHIP_VEGA20)
6175 		return -EINVAL;
6176 
6177 	err_data->ue_count = 0;
6178 	err_data->ce_count = 0;
6179 
6180 	mutex_lock(&adev->grbm_idx_mutex);
6181 
6182 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6183 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6184 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6185 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6186 				reg_value =
6187 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6188 				if (reg_value)
6189 					__get_ras_error_count(&sec_ded_counter_registers[i],
6190 							j, k, reg_value,
6191 							&sec_count, &ded_count);
6192 			}
6193 		}
6194 	}
6195 
6196 	err_data->ce_count += sec_count;
6197 	err_data->ue_count += ded_count;
6198 
6199 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6200 	mutex_unlock(&adev->grbm_idx_mutex);
6201 
6202 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6203 
6204 	return 0;
6205 }
6206 
6207 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6208 	.name = "gfx_v9_0",
6209 	.early_init = gfx_v9_0_early_init,
6210 	.late_init = gfx_v9_0_late_init,
6211 	.sw_init = gfx_v9_0_sw_init,
6212 	.sw_fini = gfx_v9_0_sw_fini,
6213 	.hw_init = gfx_v9_0_hw_init,
6214 	.hw_fini = gfx_v9_0_hw_fini,
6215 	.suspend = gfx_v9_0_suspend,
6216 	.resume = gfx_v9_0_resume,
6217 	.is_idle = gfx_v9_0_is_idle,
6218 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6219 	.soft_reset = gfx_v9_0_soft_reset,
6220 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6221 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6222 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6223 };
6224 
6225 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6226 	.type = AMDGPU_RING_TYPE_GFX,
6227 	.align_mask = 0xff,
6228 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6229 	.support_64bit_ptrs = true,
6230 	.vmhub = AMDGPU_GFXHUB_0,
6231 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6232 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6233 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6234 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6235 		5 +  /* COND_EXEC */
6236 		7 +  /* PIPELINE_SYNC */
6237 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6238 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6239 		2 + /* VM_FLUSH */
6240 		8 +  /* FENCE for VM_FLUSH */
6241 		20 + /* GDS switch */
6242 		4 + /* double SWITCH_BUFFER,
6243 		       the first COND_EXEC jump to the place just
6244 			   prior to this double SWITCH_BUFFER  */
6245 		5 + /* COND_EXEC */
6246 		7 +	 /*	HDP_flush */
6247 		4 +	 /*	VGT_flush */
6248 		14 + /*	CE_META */
6249 		31 + /*	DE_META */
6250 		3 + /* CNTX_CTRL */
6251 		5 + /* HDP_INVL */
6252 		8 + 8 + /* FENCE x2 */
6253 		2, /* SWITCH_BUFFER */
6254 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6255 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6256 	.emit_fence = gfx_v9_0_ring_emit_fence,
6257 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6258 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6259 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6260 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6261 	.test_ring = gfx_v9_0_ring_test_ring,
6262 	.test_ib = gfx_v9_0_ring_test_ib,
6263 	.insert_nop = amdgpu_ring_insert_nop,
6264 	.pad_ib = amdgpu_ring_generic_pad_ib,
6265 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6266 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6267 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6268 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6269 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6270 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6271 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6272 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6273 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6274 };
6275 
6276 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6277 	.type = AMDGPU_RING_TYPE_COMPUTE,
6278 	.align_mask = 0xff,
6279 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6280 	.support_64bit_ptrs = true,
6281 	.vmhub = AMDGPU_GFXHUB_0,
6282 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6283 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6284 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6285 	.emit_frame_size =
6286 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6287 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6288 		5 + /* hdp invalidate */
6289 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6290 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6291 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6292 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6293 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6294 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6295 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6296 	.emit_fence = gfx_v9_0_ring_emit_fence,
6297 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6298 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6299 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6300 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6301 	.test_ring = gfx_v9_0_ring_test_ring,
6302 	.test_ib = gfx_v9_0_ring_test_ib,
6303 	.insert_nop = amdgpu_ring_insert_nop,
6304 	.pad_ib = amdgpu_ring_generic_pad_ib,
6305 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6306 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6307 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6308 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6309 };
6310 
6311 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6312 	.type = AMDGPU_RING_TYPE_KIQ,
6313 	.align_mask = 0xff,
6314 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6315 	.support_64bit_ptrs = true,
6316 	.vmhub = AMDGPU_GFXHUB_0,
6317 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6318 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6319 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6320 	.emit_frame_size =
6321 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6322 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6323 		5 + /* hdp invalidate */
6324 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6325 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6326 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6327 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6328 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6329 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6330 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6331 	.test_ring = gfx_v9_0_ring_test_ring,
6332 	.insert_nop = amdgpu_ring_insert_nop,
6333 	.pad_ib = amdgpu_ring_generic_pad_ib,
6334 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6335 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6336 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6337 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6338 };
6339 
6340 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6341 {
6342 	int i;
6343 
6344 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6345 
6346 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6347 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6348 
6349 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6350 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6351 }
6352 
6353 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6354 	.set = gfx_v9_0_set_eop_interrupt_state,
6355 	.process = gfx_v9_0_eop_irq,
6356 };
6357 
6358 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6359 	.set = gfx_v9_0_set_priv_reg_fault_state,
6360 	.process = gfx_v9_0_priv_reg_irq,
6361 };
6362 
6363 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6364 	.set = gfx_v9_0_set_priv_inst_fault_state,
6365 	.process = gfx_v9_0_priv_inst_irq,
6366 };
6367 
6368 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6369 	.set = gfx_v9_0_set_cp_ecc_error_state,
6370 	.process = amdgpu_gfx_cp_ecc_error_irq,
6371 };
6372 
6373 
6374 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6375 {
6376 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6377 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6378 
6379 	adev->gfx.priv_reg_irq.num_types = 1;
6380 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6381 
6382 	adev->gfx.priv_inst_irq.num_types = 1;
6383 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6384 
6385 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6386 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6387 }
6388 
6389 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6390 {
6391 	switch (adev->asic_type) {
6392 	case CHIP_VEGA10:
6393 	case CHIP_VEGA12:
6394 	case CHIP_VEGA20:
6395 	case CHIP_RAVEN:
6396 	case CHIP_ARCTURUS:
6397 	case CHIP_RENOIR:
6398 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6399 		break;
6400 	default:
6401 		break;
6402 	}
6403 }
6404 
6405 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6406 {
6407 	/* init asci gds info */
6408 	switch (adev->asic_type) {
6409 	case CHIP_VEGA10:
6410 	case CHIP_VEGA12:
6411 	case CHIP_VEGA20:
6412 		adev->gds.gds_size = 0x10000;
6413 		break;
6414 	case CHIP_RAVEN:
6415 	case CHIP_ARCTURUS:
6416 		adev->gds.gds_size = 0x1000;
6417 		break;
6418 	default:
6419 		adev->gds.gds_size = 0x10000;
6420 		break;
6421 	}
6422 
6423 	switch (adev->asic_type) {
6424 	case CHIP_VEGA10:
6425 	case CHIP_VEGA20:
6426 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6427 		break;
6428 	case CHIP_VEGA12:
6429 		adev->gds.gds_compute_max_wave_id = 0x27f;
6430 		break;
6431 	case CHIP_RAVEN:
6432 		if (adev->rev_id >= 0x8)
6433 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6434 		else
6435 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6436 		break;
6437 	case CHIP_ARCTURUS:
6438 		adev->gds.gds_compute_max_wave_id = 0xfff;
6439 		break;
6440 	default:
6441 		/* this really depends on the chip */
6442 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6443 		break;
6444 	}
6445 
6446 	adev->gds.gws_size = 64;
6447 	adev->gds.oa_size = 16;
6448 }
6449 
6450 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6451 						 u32 bitmap)
6452 {
6453 	u32 data;
6454 
6455 	if (!bitmap)
6456 		return;
6457 
6458 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6459 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6460 
6461 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6462 }
6463 
6464 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6465 {
6466 	u32 data, mask;
6467 
6468 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6469 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6470 
6471 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6472 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6473 
6474 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6475 
6476 	return (~data) & mask;
6477 }
6478 
6479 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6480 				 struct amdgpu_cu_info *cu_info)
6481 {
6482 	int i, j, k, counter, active_cu_number = 0;
6483 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6484 	unsigned disable_masks[4 * 4];
6485 
6486 	if (!adev || !cu_info)
6487 		return -EINVAL;
6488 
6489 	/*
6490 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6491 	 */
6492 	if (adev->gfx.config.max_shader_engines *
6493 		adev->gfx.config.max_sh_per_se > 16)
6494 		return -EINVAL;
6495 
6496 	amdgpu_gfx_parse_disable_cu(disable_masks,
6497 				    adev->gfx.config.max_shader_engines,
6498 				    adev->gfx.config.max_sh_per_se);
6499 
6500 	mutex_lock(&adev->grbm_idx_mutex);
6501 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6502 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6503 			mask = 1;
6504 			ao_bitmap = 0;
6505 			counter = 0;
6506 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6507 			gfx_v9_0_set_user_cu_inactive_bitmap(
6508 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6509 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6510 
6511 			/*
6512 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6513 			 * 4x4 size array, and it's usually suitable for Vega
6514 			 * ASICs which has 4*2 SE/SH layout.
6515 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6516 			 * To mostly reduce the impact, we make it compatible
6517 			 * with current bitmap array as below:
6518 			 *    SE4,SH0 --> bitmap[0][1]
6519 			 *    SE5,SH0 --> bitmap[1][1]
6520 			 *    SE6,SH0 --> bitmap[2][1]
6521 			 *    SE7,SH0 --> bitmap[3][1]
6522 			 */
6523 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6524 
6525 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6526 				if (bitmap & mask) {
6527 					if (counter < adev->gfx.config.max_cu_per_sh)
6528 						ao_bitmap |= mask;
6529 					counter ++;
6530 				}
6531 				mask <<= 1;
6532 			}
6533 			active_cu_number += counter;
6534 			if (i < 2 && j < 2)
6535 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6536 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6537 		}
6538 	}
6539 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6540 	mutex_unlock(&adev->grbm_idx_mutex);
6541 
6542 	cu_info->number = active_cu_number;
6543 	cu_info->ao_cu_mask = ao_cu_mask;
6544 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6545 
6546 	return 0;
6547 }
6548 
6549 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6550 {
6551 	.type = AMD_IP_BLOCK_TYPE_GFX,
6552 	.major = 9,
6553 	.minor = 0,
6554 	.rev = 0,
6555 	.funcs = &gfx_v9_0_ip_funcs,
6556 };
6557