xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #include "gfx_v9_4.h"
52 
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57 
58 #define mmPWR_MISC_CNTL_STATUS					0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122 
123 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
133 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
135 
136 enum ta_ras_gfx_subblock {
137 	/*CPC*/
138 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140 	TA_RAS_BLOCK__GFX_CPC_UCODE,
141 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148 	/* CPF*/
149 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152 	TA_RAS_BLOCK__GFX_CPF_TAG,
153 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154 	/* CPG*/
155 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158 	TA_RAS_BLOCK__GFX_CPG_TAG,
159 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160 	/* GDS*/
161 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168 	/* SPI*/
169 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170 	/* SQ*/
171 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
174 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
175 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177 	/* SQC (3 ranges)*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	/* SQC range 0*/
180 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191 	/* SQC range 1*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205 	/* SQC range 2*/
206 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220 	/* TA*/
221 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
222 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228 	/* TCA*/
229 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233 	/* TCC (5 sub-ranges)*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	/* TCC range 0*/
236 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246 	/* TCC range 1*/
247 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252 	/* TCC range 2*/
253 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264 	/* TCC range 3*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270 	/* TCC range 4*/
271 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278 	/* TCI*/
279 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280 	/* TCP*/
281 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290 	/* TD*/
291 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
292 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296 	/* EA (3 sub-ranges)*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	/* EA range 0*/
299 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309 	/* EA range 1*/
310 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319 	/* EA range 2*/
320 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327 	/* UTC VM L2 bank*/
328 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329 	/* UTC VM walker*/
330 	TA_RAS_BLOCK__UTC_VML2_WALKER,
331 	/* UTC ATC L2 2MB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333 	/* UTC ATC L2 4KB cache*/
334 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335 	TA_RAS_BLOCK__GFX_MAX
336 };
337 
338 struct ras_gfx_subblock {
339 	unsigned char *name;
340 	int ta_subblock;
341 	int hw_supported_error_type;
342 	int sw_supported_error_type;
343 };
344 
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347 		#subblock,                                                     \
348 		TA_RAS_BLOCK__##subblock,                                      \
349 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351 	}
352 
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373 			     0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382 			     0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384 			     0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386 			     0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388 			     0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390 			     0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392 			     0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394 			     1),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396 			     0, 0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408 			     0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414 			     0, 0, 0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426 			     0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442 			     1),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446 			     0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459 			     0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462 			     0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464 			     0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466 			     0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502 
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526 
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548 
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563 
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591 
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602 
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625 
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668 
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685 
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699 
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711 
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723 
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728 
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740 					  void *ras_error_status);
741 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
742 				     void *inject_if);
743 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
744 
745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
746 				uint64_t queue_mask)
747 {
748 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
749 	amdgpu_ring_write(kiq_ring,
750 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
751 		/* vmid_mask:0* queue_type:0 (KIQ) */
752 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
753 	amdgpu_ring_write(kiq_ring,
754 			lower_32_bits(queue_mask));	/* queue mask lo */
755 	amdgpu_ring_write(kiq_ring,
756 			upper_32_bits(queue_mask));	/* queue mask hi */
757 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
758 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
759 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
760 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
761 }
762 
763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
764 				 struct amdgpu_ring *ring)
765 {
766 	struct amdgpu_device *adev = kiq_ring->adev;
767 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
768 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
769 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
770 
771 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
772 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
773 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
774 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
775 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
776 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
777 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
778 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
779 			 /*queue_type: normal compute queue */
780 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
781 			 /* alloc format: all_on_one_pipe */
782 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
783 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
784 			 /* num_queues: must be 1 */
785 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
786 	amdgpu_ring_write(kiq_ring,
787 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
788 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
789 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
790 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
791 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
792 }
793 
794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
795 				   struct amdgpu_ring *ring,
796 				   enum amdgpu_unmap_queues_action action,
797 				   u64 gpu_addr, u64 seq)
798 {
799 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
800 
801 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
802 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
803 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
804 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
805 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
806 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
807 	amdgpu_ring_write(kiq_ring,
808 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
809 
810 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
811 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
812 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
813 		amdgpu_ring_write(kiq_ring, seq);
814 	} else {
815 		amdgpu_ring_write(kiq_ring, 0);
816 		amdgpu_ring_write(kiq_ring, 0);
817 		amdgpu_ring_write(kiq_ring, 0);
818 	}
819 }
820 
821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
822 				   struct amdgpu_ring *ring,
823 				   u64 addr,
824 				   u64 seq)
825 {
826 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
827 
828 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
829 	amdgpu_ring_write(kiq_ring,
830 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
831 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
832 			  PACKET3_QUERY_STATUS_COMMAND(2));
833 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
834 	amdgpu_ring_write(kiq_ring,
835 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
836 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
837 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
838 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
839 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
840 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
841 }
842 
843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
844 				uint16_t pasid, uint32_t flush_type,
845 				bool all_hub)
846 {
847 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
848 	amdgpu_ring_write(kiq_ring,
849 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
850 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
851 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
852 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
853 }
854 
855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
856 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
857 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
858 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
859 	.kiq_query_status = gfx_v9_0_kiq_query_status,
860 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
861 	.set_resources_size = 8,
862 	.map_queues_size = 7,
863 	.unmap_queues_size = 6,
864 	.query_status_size = 7,
865 	.invalidate_tlbs_size = 2,
866 };
867 
868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
869 {
870 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
871 }
872 
873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
874 {
875 	switch (adev->asic_type) {
876 	case CHIP_VEGA10:
877 		soc15_program_register_sequence(adev,
878 						golden_settings_gc_9_0,
879 						ARRAY_SIZE(golden_settings_gc_9_0));
880 		soc15_program_register_sequence(adev,
881 						golden_settings_gc_9_0_vg10,
882 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
883 		break;
884 	case CHIP_VEGA12:
885 		soc15_program_register_sequence(adev,
886 						golden_settings_gc_9_2_1,
887 						ARRAY_SIZE(golden_settings_gc_9_2_1));
888 		soc15_program_register_sequence(adev,
889 						golden_settings_gc_9_2_1_vg12,
890 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
891 		break;
892 	case CHIP_VEGA20:
893 		soc15_program_register_sequence(adev,
894 						golden_settings_gc_9_0,
895 						ARRAY_SIZE(golden_settings_gc_9_0));
896 		soc15_program_register_sequence(adev,
897 						golden_settings_gc_9_0_vg20,
898 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
899 		break;
900 	case CHIP_ARCTURUS:
901 		soc15_program_register_sequence(adev,
902 						golden_settings_gc_9_4_1_arct,
903 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
904 		break;
905 	case CHIP_RAVEN:
906 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
907 						ARRAY_SIZE(golden_settings_gc_9_1));
908 		if (adev->rev_id >= 8)
909 			soc15_program_register_sequence(adev,
910 							golden_settings_gc_9_1_rv2,
911 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
912 		else
913 			soc15_program_register_sequence(adev,
914 							golden_settings_gc_9_1_rv1,
915 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
916 		break;
917 	 case CHIP_RENOIR:
918 		soc15_program_register_sequence(adev,
919 						golden_settings_gc_9_1_rn,
920 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
921 		return; /* for renoir, don't need common goldensetting */
922 	default:
923 		break;
924 	}
925 
926 	if (adev->asic_type != CHIP_ARCTURUS)
927 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
928 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
929 }
930 
931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
932 {
933 	adev->gfx.scratch.num_reg = 8;
934 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
935 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
936 }
937 
938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
939 				       bool wc, uint32_t reg, uint32_t val)
940 {
941 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
942 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
943 				WRITE_DATA_DST_SEL(0) |
944 				(wc ? WR_CONFIRM : 0));
945 	amdgpu_ring_write(ring, reg);
946 	amdgpu_ring_write(ring, 0);
947 	amdgpu_ring_write(ring, val);
948 }
949 
950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
951 				  int mem_space, int opt, uint32_t addr0,
952 				  uint32_t addr1, uint32_t ref, uint32_t mask,
953 				  uint32_t inv)
954 {
955 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
956 	amdgpu_ring_write(ring,
957 				 /* memory (1) or register (0) */
958 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
959 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
960 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
961 				 WAIT_REG_MEM_ENGINE(eng_sel)));
962 
963 	if (mem_space)
964 		BUG_ON(addr0 & 0x3); /* Dword align */
965 	amdgpu_ring_write(ring, addr0);
966 	amdgpu_ring_write(ring, addr1);
967 	amdgpu_ring_write(ring, ref);
968 	amdgpu_ring_write(ring, mask);
969 	amdgpu_ring_write(ring, inv); /* poll interval */
970 }
971 
972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
973 {
974 	struct amdgpu_device *adev = ring->adev;
975 	uint32_t scratch;
976 	uint32_t tmp = 0;
977 	unsigned i;
978 	int r;
979 
980 	r = amdgpu_gfx_scratch_get(adev, &scratch);
981 	if (r)
982 		return r;
983 
984 	WREG32(scratch, 0xCAFEDEAD);
985 	r = amdgpu_ring_alloc(ring, 3);
986 	if (r)
987 		goto error_free_scratch;
988 
989 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
990 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
991 	amdgpu_ring_write(ring, 0xDEADBEEF);
992 	amdgpu_ring_commit(ring);
993 
994 	for (i = 0; i < adev->usec_timeout; i++) {
995 		tmp = RREG32(scratch);
996 		if (tmp == 0xDEADBEEF)
997 			break;
998 		udelay(1);
999 	}
1000 
1001 	if (i >= adev->usec_timeout)
1002 		r = -ETIMEDOUT;
1003 
1004 error_free_scratch:
1005 	amdgpu_gfx_scratch_free(adev, scratch);
1006 	return r;
1007 }
1008 
1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1010 {
1011 	struct amdgpu_device *adev = ring->adev;
1012 	struct amdgpu_ib ib;
1013 	struct dma_fence *f = NULL;
1014 
1015 	unsigned index;
1016 	uint64_t gpu_addr;
1017 	uint32_t tmp;
1018 	long r;
1019 
1020 	r = amdgpu_device_wb_get(adev, &index);
1021 	if (r)
1022 		return r;
1023 
1024 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1025 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1026 	memset(&ib, 0, sizeof(ib));
1027 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
1028 	if (r)
1029 		goto err1;
1030 
1031 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1032 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1033 	ib.ptr[2] = lower_32_bits(gpu_addr);
1034 	ib.ptr[3] = upper_32_bits(gpu_addr);
1035 	ib.ptr[4] = 0xDEADBEEF;
1036 	ib.length_dw = 5;
1037 
1038 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1039 	if (r)
1040 		goto err2;
1041 
1042 	r = dma_fence_wait_timeout(f, false, timeout);
1043 	if (r == 0) {
1044 		r = -ETIMEDOUT;
1045 		goto err2;
1046 	} else if (r < 0) {
1047 		goto err2;
1048 	}
1049 
1050 	tmp = adev->wb.wb[index];
1051 	if (tmp == 0xDEADBEEF)
1052 		r = 0;
1053 	else
1054 		r = -EINVAL;
1055 
1056 err2:
1057 	amdgpu_ib_free(adev, &ib, NULL);
1058 	dma_fence_put(f);
1059 err1:
1060 	amdgpu_device_wb_free(adev, index);
1061 	return r;
1062 }
1063 
1064 
1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1066 {
1067 	release_firmware(adev->gfx.pfp_fw);
1068 	adev->gfx.pfp_fw = NULL;
1069 	release_firmware(adev->gfx.me_fw);
1070 	adev->gfx.me_fw = NULL;
1071 	release_firmware(adev->gfx.ce_fw);
1072 	adev->gfx.ce_fw = NULL;
1073 	release_firmware(adev->gfx.rlc_fw);
1074 	adev->gfx.rlc_fw = NULL;
1075 	release_firmware(adev->gfx.mec_fw);
1076 	adev->gfx.mec_fw = NULL;
1077 	release_firmware(adev->gfx.mec2_fw);
1078 	adev->gfx.mec2_fw = NULL;
1079 
1080 	kfree(adev->gfx.rlc.register_list_format);
1081 }
1082 
1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1084 {
1085 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1086 
1087 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1088 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1089 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1090 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1091 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1092 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1093 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1094 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1095 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1096 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1097 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1098 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1099 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1100 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1101 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1102 }
1103 
1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1105 {
1106 	adev->gfx.me_fw_write_wait = false;
1107 	adev->gfx.mec_fw_write_wait = false;
1108 
1109 	if ((adev->asic_type != CHIP_ARCTURUS) &&
1110 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1111 	    (adev->gfx.mec_feature_version < 46) ||
1112 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1113 	    (adev->gfx.pfp_feature_version < 46)))
1114 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1115 
1116 	switch (adev->asic_type) {
1117 	case CHIP_VEGA10:
1118 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119 		    (adev->gfx.me_feature_version >= 42) &&
1120 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1121 		    (adev->gfx.pfp_feature_version >= 42))
1122 			adev->gfx.me_fw_write_wait = true;
1123 
1124 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1125 		    (adev->gfx.mec_feature_version >= 42))
1126 			adev->gfx.mec_fw_write_wait = true;
1127 		break;
1128 	case CHIP_VEGA12:
1129 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130 		    (adev->gfx.me_feature_version >= 44) &&
1131 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132 		    (adev->gfx.pfp_feature_version >= 44))
1133 			adev->gfx.me_fw_write_wait = true;
1134 
1135 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1136 		    (adev->gfx.mec_feature_version >= 44))
1137 			adev->gfx.mec_fw_write_wait = true;
1138 		break;
1139 	case CHIP_VEGA20:
1140 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1141 		    (adev->gfx.me_feature_version >= 44) &&
1142 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1143 		    (adev->gfx.pfp_feature_version >= 44))
1144 			adev->gfx.me_fw_write_wait = true;
1145 
1146 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1147 		    (adev->gfx.mec_feature_version >= 44))
1148 			adev->gfx.mec_fw_write_wait = true;
1149 		break;
1150 	case CHIP_RAVEN:
1151 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1152 		    (adev->gfx.me_feature_version >= 42) &&
1153 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1154 		    (adev->gfx.pfp_feature_version >= 42))
1155 			adev->gfx.me_fw_write_wait = true;
1156 
1157 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1158 		    (adev->gfx.mec_feature_version >= 42))
1159 			adev->gfx.mec_fw_write_wait = true;
1160 		break;
1161 	default:
1162 		break;
1163 	}
1164 }
1165 
1166 struct amdgpu_gfxoff_quirk {
1167 	u16 chip_vendor;
1168 	u16 chip_device;
1169 	u16 subsys_vendor;
1170 	u16 subsys_device;
1171 	u8 revision;
1172 };
1173 
1174 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1175 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1176 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1177 	{ 0, 0, 0, 0, 0 },
1178 };
1179 
1180 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1181 {
1182 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1183 
1184 	while (p && p->chip_device != 0) {
1185 		if (pdev->vendor == p->chip_vendor &&
1186 		    pdev->device == p->chip_device &&
1187 		    pdev->subsystem_vendor == p->subsys_vendor &&
1188 		    pdev->subsystem_device == p->subsys_device &&
1189 		    pdev->revision == p->revision) {
1190 			return true;
1191 		}
1192 		++p;
1193 	}
1194 	return false;
1195 }
1196 
1197 static bool is_raven_kicker(struct amdgpu_device *adev)
1198 {
1199 	if (adev->pm.fw_version >= 0x41e2b)
1200 		return true;
1201 	else
1202 		return false;
1203 }
1204 
1205 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1206 {
1207 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1208 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1209 
1210 	switch (adev->asic_type) {
1211 	case CHIP_VEGA10:
1212 	case CHIP_VEGA12:
1213 	case CHIP_VEGA20:
1214 		break;
1215 	case CHIP_RAVEN:
1216 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1217 		    ((!is_raven_kicker(adev) &&
1218 		      adev->gfx.rlc_fw_version < 531) ||
1219 		     (adev->gfx.rlc_feature_version < 1) ||
1220 		     !adev->gfx.rlc.is_rlc_v2_1))
1221 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1222 
1223 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1224 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1225 				AMD_PG_SUPPORT_CP |
1226 				AMD_PG_SUPPORT_RLC_SMU_HS;
1227 		break;
1228 	case CHIP_RENOIR:
1229 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1230 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1231 				AMD_PG_SUPPORT_CP |
1232 				AMD_PG_SUPPORT_RLC_SMU_HS;
1233 		break;
1234 	default:
1235 		break;
1236 	}
1237 }
1238 
1239 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1240 					  const char *chip_name)
1241 {
1242 	char fw_name[30];
1243 	int err;
1244 	struct amdgpu_firmware_info *info = NULL;
1245 	const struct common_firmware_header *header = NULL;
1246 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1247 
1248 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1249 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1250 	if (err)
1251 		goto out;
1252 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1253 	if (err)
1254 		goto out;
1255 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1256 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1257 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1258 
1259 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1260 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1261 	if (err)
1262 		goto out;
1263 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1264 	if (err)
1265 		goto out;
1266 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1267 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1268 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1269 
1270 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1271 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1272 	if (err)
1273 		goto out;
1274 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1275 	if (err)
1276 		goto out;
1277 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1278 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1279 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1280 
1281 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1282 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1283 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1284 		info->fw = adev->gfx.pfp_fw;
1285 		header = (const struct common_firmware_header *)info->fw->data;
1286 		adev->firmware.fw_size +=
1287 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1288 
1289 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1290 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1291 		info->fw = adev->gfx.me_fw;
1292 		header = (const struct common_firmware_header *)info->fw->data;
1293 		adev->firmware.fw_size +=
1294 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1295 
1296 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1297 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1298 		info->fw = adev->gfx.ce_fw;
1299 		header = (const struct common_firmware_header *)info->fw->data;
1300 		adev->firmware.fw_size +=
1301 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1302 	}
1303 
1304 out:
1305 	if (err) {
1306 		dev_err(adev->dev,
1307 			"gfx9: Failed to load firmware \"%s\"\n",
1308 			fw_name);
1309 		release_firmware(adev->gfx.pfp_fw);
1310 		adev->gfx.pfp_fw = NULL;
1311 		release_firmware(adev->gfx.me_fw);
1312 		adev->gfx.me_fw = NULL;
1313 		release_firmware(adev->gfx.ce_fw);
1314 		adev->gfx.ce_fw = NULL;
1315 	}
1316 	return err;
1317 }
1318 
1319 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1320 					  const char *chip_name)
1321 {
1322 	char fw_name[30];
1323 	int err;
1324 	struct amdgpu_firmware_info *info = NULL;
1325 	const struct common_firmware_header *header = NULL;
1326 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1327 	unsigned int *tmp = NULL;
1328 	unsigned int i = 0;
1329 	uint16_t version_major;
1330 	uint16_t version_minor;
1331 	uint32_t smu_version;
1332 
1333 	/*
1334 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1335 	 * instead of picasso_rlc.bin.
1336 	 * Judgment method:
1337 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1338 	 *          or revision >= 0xD8 && revision <= 0xDF
1339 	 * otherwise is PCO FP5
1340 	 */
1341 	if (!strcmp(chip_name, "picasso") &&
1342 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1343 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1344 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1345 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1346 		(smu_version >= 0x41e2b))
1347 		/**
1348 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1349 		*/
1350 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1351 	else
1352 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1353 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1354 	if (err)
1355 		goto out;
1356 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1357 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1358 
1359 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1360 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1361 	if (version_major == 2 && version_minor == 1)
1362 		adev->gfx.rlc.is_rlc_v2_1 = true;
1363 
1364 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1365 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1366 	adev->gfx.rlc.save_and_restore_offset =
1367 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1368 	adev->gfx.rlc.clear_state_descriptor_offset =
1369 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1370 	adev->gfx.rlc.avail_scratch_ram_locations =
1371 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1372 	adev->gfx.rlc.reg_restore_list_size =
1373 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1374 	adev->gfx.rlc.reg_list_format_start =
1375 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1376 	adev->gfx.rlc.reg_list_format_separate_start =
1377 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1378 	adev->gfx.rlc.starting_offsets_start =
1379 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1380 	adev->gfx.rlc.reg_list_format_size_bytes =
1381 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1382 	adev->gfx.rlc.reg_list_size_bytes =
1383 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1384 	adev->gfx.rlc.register_list_format =
1385 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1386 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1387 	if (!adev->gfx.rlc.register_list_format) {
1388 		err = -ENOMEM;
1389 		goto out;
1390 	}
1391 
1392 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1393 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1394 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1395 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1396 
1397 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1398 
1399 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1400 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1401 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1402 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1403 
1404 	if (adev->gfx.rlc.is_rlc_v2_1)
1405 		gfx_v9_0_init_rlc_ext_microcode(adev);
1406 
1407 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1408 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1409 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1410 		info->fw = adev->gfx.rlc_fw;
1411 		header = (const struct common_firmware_header *)info->fw->data;
1412 		adev->firmware.fw_size +=
1413 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414 
1415 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1416 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1417 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1418 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1419 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1420 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1421 			info->fw = adev->gfx.rlc_fw;
1422 			adev->firmware.fw_size +=
1423 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1424 
1425 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1426 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1427 			info->fw = adev->gfx.rlc_fw;
1428 			adev->firmware.fw_size +=
1429 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1430 
1431 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1432 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1433 			info->fw = adev->gfx.rlc_fw;
1434 			adev->firmware.fw_size +=
1435 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1436 		}
1437 	}
1438 
1439 out:
1440 	if (err) {
1441 		dev_err(adev->dev,
1442 			"gfx9: Failed to load firmware \"%s\"\n",
1443 			fw_name);
1444 		release_firmware(adev->gfx.rlc_fw);
1445 		adev->gfx.rlc_fw = NULL;
1446 	}
1447 	return err;
1448 }
1449 
1450 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1451 					  const char *chip_name)
1452 {
1453 	char fw_name[30];
1454 	int err;
1455 	struct amdgpu_firmware_info *info = NULL;
1456 	const struct common_firmware_header *header = NULL;
1457 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1458 
1459 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1460 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1461 	if (err)
1462 		goto out;
1463 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1464 	if (err)
1465 		goto out;
1466 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1467 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1468 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1469 
1470 
1471 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1472 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1473 	if (!err) {
1474 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1475 		if (err)
1476 			goto out;
1477 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1478 		adev->gfx.mec2_fw->data;
1479 		adev->gfx.mec2_fw_version =
1480 		le32_to_cpu(cp_hdr->header.ucode_version);
1481 		adev->gfx.mec2_feature_version =
1482 		le32_to_cpu(cp_hdr->ucode_feature_version);
1483 	} else {
1484 		err = 0;
1485 		adev->gfx.mec2_fw = NULL;
1486 	}
1487 
1488 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1489 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1490 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1491 		info->fw = adev->gfx.mec_fw;
1492 		header = (const struct common_firmware_header *)info->fw->data;
1493 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1494 		adev->firmware.fw_size +=
1495 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1496 
1497 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1498 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1499 		info->fw = adev->gfx.mec_fw;
1500 		adev->firmware.fw_size +=
1501 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1502 
1503 		if (adev->gfx.mec2_fw) {
1504 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1505 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1506 			info->fw = adev->gfx.mec2_fw;
1507 			header = (const struct common_firmware_header *)info->fw->data;
1508 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1509 			adev->firmware.fw_size +=
1510 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1511 
1512 			/* TODO: Determine if MEC2 JT FW loading can be removed
1513 				 for all GFX V9 asic and above */
1514 			if (adev->asic_type != CHIP_ARCTURUS &&
1515 			    adev->asic_type != CHIP_RENOIR) {
1516 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1517 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1518 				info->fw = adev->gfx.mec2_fw;
1519 				adev->firmware.fw_size +=
1520 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1521 					PAGE_SIZE);
1522 			}
1523 		}
1524 	}
1525 
1526 out:
1527 	gfx_v9_0_check_if_need_gfxoff(adev);
1528 	gfx_v9_0_check_fw_write_wait(adev);
1529 	if (err) {
1530 		dev_err(adev->dev,
1531 			"gfx9: Failed to load firmware \"%s\"\n",
1532 			fw_name);
1533 		release_firmware(adev->gfx.mec_fw);
1534 		adev->gfx.mec_fw = NULL;
1535 		release_firmware(adev->gfx.mec2_fw);
1536 		adev->gfx.mec2_fw = NULL;
1537 	}
1538 	return err;
1539 }
1540 
1541 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1542 {
1543 	const char *chip_name;
1544 	int r;
1545 
1546 	DRM_DEBUG("\n");
1547 
1548 	switch (adev->asic_type) {
1549 	case CHIP_VEGA10:
1550 		chip_name = "vega10";
1551 		break;
1552 	case CHIP_VEGA12:
1553 		chip_name = "vega12";
1554 		break;
1555 	case CHIP_VEGA20:
1556 		chip_name = "vega20";
1557 		break;
1558 	case CHIP_RAVEN:
1559 		if (adev->rev_id >= 8)
1560 			chip_name = "raven2";
1561 		else if (adev->pdev->device == 0x15d8)
1562 			chip_name = "picasso";
1563 		else
1564 			chip_name = "raven";
1565 		break;
1566 	case CHIP_ARCTURUS:
1567 		chip_name = "arcturus";
1568 		break;
1569 	case CHIP_RENOIR:
1570 		chip_name = "renoir";
1571 		break;
1572 	default:
1573 		BUG();
1574 	}
1575 
1576 	/* No CPG in Arcturus */
1577 	if (adev->asic_type != CHIP_ARCTURUS) {
1578 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1579 		if (r)
1580 			return r;
1581 	}
1582 
1583 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1584 	if (r)
1585 		return r;
1586 
1587 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1588 	if (r)
1589 		return r;
1590 
1591 	return r;
1592 }
1593 
1594 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1595 {
1596 	u32 count = 0;
1597 	const struct cs_section_def *sect = NULL;
1598 	const struct cs_extent_def *ext = NULL;
1599 
1600 	/* begin clear state */
1601 	count += 2;
1602 	/* context control state */
1603 	count += 3;
1604 
1605 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1606 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1607 			if (sect->id == SECT_CONTEXT)
1608 				count += 2 + ext->reg_count;
1609 			else
1610 				return 0;
1611 		}
1612 	}
1613 
1614 	/* end clear state */
1615 	count += 2;
1616 	/* clear state */
1617 	count += 2;
1618 
1619 	return count;
1620 }
1621 
1622 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1623 				    volatile u32 *buffer)
1624 {
1625 	u32 count = 0, i;
1626 	const struct cs_section_def *sect = NULL;
1627 	const struct cs_extent_def *ext = NULL;
1628 
1629 	if (adev->gfx.rlc.cs_data == NULL)
1630 		return;
1631 	if (buffer == NULL)
1632 		return;
1633 
1634 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1635 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1636 
1637 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1638 	buffer[count++] = cpu_to_le32(0x80000000);
1639 	buffer[count++] = cpu_to_le32(0x80000000);
1640 
1641 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1642 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1643 			if (sect->id == SECT_CONTEXT) {
1644 				buffer[count++] =
1645 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1646 				buffer[count++] = cpu_to_le32(ext->reg_index -
1647 						PACKET3_SET_CONTEXT_REG_START);
1648 				for (i = 0; i < ext->reg_count; i++)
1649 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1650 			} else {
1651 				return;
1652 			}
1653 		}
1654 	}
1655 
1656 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1657 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1658 
1659 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1660 	buffer[count++] = cpu_to_le32(0);
1661 }
1662 
1663 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1664 {
1665 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1666 	uint32_t pg_always_on_cu_num = 2;
1667 	uint32_t always_on_cu_num;
1668 	uint32_t i, j, k;
1669 	uint32_t mask, cu_bitmap, counter;
1670 
1671 	if (adev->flags & AMD_IS_APU)
1672 		always_on_cu_num = 4;
1673 	else if (adev->asic_type == CHIP_VEGA12)
1674 		always_on_cu_num = 8;
1675 	else
1676 		always_on_cu_num = 12;
1677 
1678 	mutex_lock(&adev->grbm_idx_mutex);
1679 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1680 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1681 			mask = 1;
1682 			cu_bitmap = 0;
1683 			counter = 0;
1684 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1685 
1686 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1687 				if (cu_info->bitmap[i][j] & mask) {
1688 					if (counter == pg_always_on_cu_num)
1689 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1690 					if (counter < always_on_cu_num)
1691 						cu_bitmap |= mask;
1692 					else
1693 						break;
1694 					counter++;
1695 				}
1696 				mask <<= 1;
1697 			}
1698 
1699 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1700 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1701 		}
1702 	}
1703 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1704 	mutex_unlock(&adev->grbm_idx_mutex);
1705 }
1706 
1707 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1708 {
1709 	uint32_t data;
1710 
1711 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1712 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1713 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1714 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1715 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1716 
1717 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1718 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1719 
1720 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1721 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1722 
1723 	mutex_lock(&adev->grbm_idx_mutex);
1724 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1725 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1726 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1727 
1728 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1729 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1730 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1731 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1732 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1733 
1734 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1735 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1736 	data &= 0x0000FFFF;
1737 	data |= 0x00C00000;
1738 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1739 
1740 	/*
1741 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1742 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1743 	 */
1744 
1745 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1746 	 * but used for RLC_LB_CNTL configuration */
1747 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1748 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1749 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1750 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1751 	mutex_unlock(&adev->grbm_idx_mutex);
1752 
1753 	gfx_v9_0_init_always_on_cu_mask(adev);
1754 }
1755 
1756 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1757 {
1758 	uint32_t data;
1759 
1760 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1761 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1762 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1763 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1764 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1765 
1766 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1767 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1768 
1769 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1770 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1771 
1772 	mutex_lock(&adev->grbm_idx_mutex);
1773 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1774 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1775 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1776 
1777 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1778 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1779 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1780 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1781 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1782 
1783 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1784 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1785 	data &= 0x0000FFFF;
1786 	data |= 0x00C00000;
1787 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1788 
1789 	/*
1790 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1791 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1792 	 */
1793 
1794 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1795 	 * but used for RLC_LB_CNTL configuration */
1796 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1797 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1798 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1799 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1800 	mutex_unlock(&adev->grbm_idx_mutex);
1801 
1802 	gfx_v9_0_init_always_on_cu_mask(adev);
1803 }
1804 
1805 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1806 {
1807 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1808 }
1809 
1810 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1811 {
1812 	return 5;
1813 }
1814 
1815 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1816 {
1817 	const struct cs_section_def *cs_data;
1818 	int r;
1819 
1820 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1821 
1822 	cs_data = adev->gfx.rlc.cs_data;
1823 
1824 	if (cs_data) {
1825 		/* init clear state block */
1826 		r = amdgpu_gfx_rlc_init_csb(adev);
1827 		if (r)
1828 			return r;
1829 	}
1830 
1831 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1832 		/* TODO: double check the cp_table_size for RV */
1833 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1834 		r = amdgpu_gfx_rlc_init_cpt(adev);
1835 		if (r)
1836 			return r;
1837 	}
1838 
1839 	switch (adev->asic_type) {
1840 	case CHIP_RAVEN:
1841 		gfx_v9_0_init_lbpw(adev);
1842 		break;
1843 	case CHIP_VEGA20:
1844 		gfx_v9_4_init_lbpw(adev);
1845 		break;
1846 	default:
1847 		break;
1848 	}
1849 
1850 	/* init spm vmid with 0xf */
1851 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1852 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1853 
1854 	return 0;
1855 }
1856 
1857 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1858 {
1859 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1860 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1861 }
1862 
1863 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1864 {
1865 	int r;
1866 	u32 *hpd;
1867 	const __le32 *fw_data;
1868 	unsigned fw_size;
1869 	u32 *fw;
1870 	size_t mec_hpd_size;
1871 
1872 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1873 
1874 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1875 
1876 	/* take ownership of the relevant compute queues */
1877 	amdgpu_gfx_compute_queue_acquire(adev);
1878 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1879 
1880 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1881 				      AMDGPU_GEM_DOMAIN_VRAM,
1882 				      &adev->gfx.mec.hpd_eop_obj,
1883 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1884 				      (void **)&hpd);
1885 	if (r) {
1886 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1887 		gfx_v9_0_mec_fini(adev);
1888 		return r;
1889 	}
1890 
1891 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1892 
1893 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1894 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1895 
1896 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1897 
1898 	fw_data = (const __le32 *)
1899 		(adev->gfx.mec_fw->data +
1900 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1901 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1902 
1903 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1904 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1905 				      &adev->gfx.mec.mec_fw_obj,
1906 				      &adev->gfx.mec.mec_fw_gpu_addr,
1907 				      (void **)&fw);
1908 	if (r) {
1909 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1910 		gfx_v9_0_mec_fini(adev);
1911 		return r;
1912 	}
1913 
1914 	memcpy(fw, fw_data, fw_size);
1915 
1916 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1917 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1918 
1919 	return 0;
1920 }
1921 
1922 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1923 {
1924 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1925 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1926 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1927 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1928 		(SQ_IND_INDEX__FORCE_READ_MASK));
1929 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1930 }
1931 
1932 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1933 			   uint32_t wave, uint32_t thread,
1934 			   uint32_t regno, uint32_t num, uint32_t *out)
1935 {
1936 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1937 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1938 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1939 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1940 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1941 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1942 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1943 	while (num--)
1944 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1945 }
1946 
1947 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1948 {
1949 	/* type 1 wave data */
1950 	dst[(*no_fields)++] = 1;
1951 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1952 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1953 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1954 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1955 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1956 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1957 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1958 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1959 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1960 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1961 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1962 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1963 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1964 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1965 }
1966 
1967 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1968 				     uint32_t wave, uint32_t start,
1969 				     uint32_t size, uint32_t *dst)
1970 {
1971 	wave_read_regs(
1972 		adev, simd, wave, 0,
1973 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1974 }
1975 
1976 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1977 				     uint32_t wave, uint32_t thread,
1978 				     uint32_t start, uint32_t size,
1979 				     uint32_t *dst)
1980 {
1981 	wave_read_regs(
1982 		adev, simd, wave, thread,
1983 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1984 }
1985 
1986 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1987 				  u32 me, u32 pipe, u32 q, u32 vm)
1988 {
1989 	soc15_grbm_select(adev, me, pipe, q, vm);
1990 }
1991 
1992 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1993 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1994 	.select_se_sh = &gfx_v9_0_select_se_sh,
1995 	.read_wave_data = &gfx_v9_0_read_wave_data,
1996 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1997 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1998 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1999 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2000 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003 
2004 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2005 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2006 	.select_se_sh = &gfx_v9_0_select_se_sh,
2007 	.read_wave_data = &gfx_v9_0_read_wave_data,
2008 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2009 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2010 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2011 	.ras_error_inject = &gfx_v9_4_ras_error_inject,
2012 	.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2013 	.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2014 };
2015 
2016 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2017 {
2018 	u32 gb_addr_config;
2019 	int err;
2020 
2021 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2022 
2023 	switch (adev->asic_type) {
2024 	case CHIP_VEGA10:
2025 		adev->gfx.config.max_hw_contexts = 8;
2026 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2031 		break;
2032 	case CHIP_VEGA12:
2033 		adev->gfx.config.max_hw_contexts = 8;
2034 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2035 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2036 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2037 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2038 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2039 		DRM_INFO("fix gfx.config for vega12\n");
2040 		break;
2041 	case CHIP_VEGA20:
2042 		adev->gfx.config.max_hw_contexts = 8;
2043 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2044 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2045 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2046 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2047 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2048 		gb_addr_config &= ~0xf3e777ff;
2049 		gb_addr_config |= 0x22014042;
2050 		/* check vbios table if gpu info is not available */
2051 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2052 		if (err)
2053 			return err;
2054 		break;
2055 	case CHIP_RAVEN:
2056 		adev->gfx.config.max_hw_contexts = 8;
2057 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2058 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2059 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2060 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2061 		if (adev->rev_id >= 8)
2062 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2063 		else
2064 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2065 		break;
2066 	case CHIP_ARCTURUS:
2067 		adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2068 		adev->gfx.config.max_hw_contexts = 8;
2069 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2070 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2071 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2072 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2073 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2074 		gb_addr_config &= ~0xf3e777ff;
2075 		gb_addr_config |= 0x22014042;
2076 		break;
2077 	case CHIP_RENOIR:
2078 		adev->gfx.config.max_hw_contexts = 8;
2079 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2080 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2081 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2082 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2083 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2084 		gb_addr_config &= ~0xf3e777ff;
2085 		gb_addr_config |= 0x22010042;
2086 		break;
2087 	default:
2088 		BUG();
2089 		break;
2090 	}
2091 
2092 	adev->gfx.config.gb_addr_config = gb_addr_config;
2093 
2094 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2095 			REG_GET_FIELD(
2096 					adev->gfx.config.gb_addr_config,
2097 					GB_ADDR_CONFIG,
2098 					NUM_PIPES);
2099 
2100 	adev->gfx.config.max_tile_pipes =
2101 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2102 
2103 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2104 			REG_GET_FIELD(
2105 					adev->gfx.config.gb_addr_config,
2106 					GB_ADDR_CONFIG,
2107 					NUM_BANKS);
2108 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2109 			REG_GET_FIELD(
2110 					adev->gfx.config.gb_addr_config,
2111 					GB_ADDR_CONFIG,
2112 					MAX_COMPRESSED_FRAGS);
2113 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2114 			REG_GET_FIELD(
2115 					adev->gfx.config.gb_addr_config,
2116 					GB_ADDR_CONFIG,
2117 					NUM_RB_PER_SE);
2118 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2119 			REG_GET_FIELD(
2120 					adev->gfx.config.gb_addr_config,
2121 					GB_ADDR_CONFIG,
2122 					NUM_SHADER_ENGINES);
2123 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2124 			REG_GET_FIELD(
2125 					adev->gfx.config.gb_addr_config,
2126 					GB_ADDR_CONFIG,
2127 					PIPE_INTERLEAVE_SIZE));
2128 
2129 	return 0;
2130 }
2131 
2132 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2133 				      int mec, int pipe, int queue)
2134 {
2135 	int r;
2136 	unsigned irq_type;
2137 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2138 
2139 	ring = &adev->gfx.compute_ring[ring_id];
2140 
2141 	/* mec0 is me1 */
2142 	ring->me = mec + 1;
2143 	ring->pipe = pipe;
2144 	ring->queue = queue;
2145 
2146 	ring->ring_obj = NULL;
2147 	ring->use_doorbell = true;
2148 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2149 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2150 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2151 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2152 
2153 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2154 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2155 		+ ring->pipe;
2156 
2157 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2158 	r = amdgpu_ring_init(adev, ring, 1024,
2159 			     &adev->gfx.eop_irq, irq_type);
2160 	if (r)
2161 		return r;
2162 
2163 
2164 	return 0;
2165 }
2166 
2167 static int gfx_v9_0_sw_init(void *handle)
2168 {
2169 	int i, j, k, r, ring_id;
2170 	struct amdgpu_ring *ring;
2171 	struct amdgpu_kiq *kiq;
2172 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2173 
2174 	switch (adev->asic_type) {
2175 	case CHIP_VEGA10:
2176 	case CHIP_VEGA12:
2177 	case CHIP_VEGA20:
2178 	case CHIP_RAVEN:
2179 	case CHIP_ARCTURUS:
2180 	case CHIP_RENOIR:
2181 		adev->gfx.mec.num_mec = 2;
2182 		break;
2183 	default:
2184 		adev->gfx.mec.num_mec = 1;
2185 		break;
2186 	}
2187 
2188 	adev->gfx.mec.num_pipe_per_mec = 4;
2189 	adev->gfx.mec.num_queue_per_pipe = 8;
2190 
2191 	/* EOP Event */
2192 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2193 	if (r)
2194 		return r;
2195 
2196 	/* Privileged reg */
2197 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2198 			      &adev->gfx.priv_reg_irq);
2199 	if (r)
2200 		return r;
2201 
2202 	/* Privileged inst */
2203 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2204 			      &adev->gfx.priv_inst_irq);
2205 	if (r)
2206 		return r;
2207 
2208 	/* ECC error */
2209 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2210 			      &adev->gfx.cp_ecc_error_irq);
2211 	if (r)
2212 		return r;
2213 
2214 	/* FUE error */
2215 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2216 			      &adev->gfx.cp_ecc_error_irq);
2217 	if (r)
2218 		return r;
2219 
2220 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2221 
2222 	gfx_v9_0_scratch_init(adev);
2223 
2224 	r = gfx_v9_0_init_microcode(adev);
2225 	if (r) {
2226 		DRM_ERROR("Failed to load gfx firmware!\n");
2227 		return r;
2228 	}
2229 
2230 	r = adev->gfx.rlc.funcs->init(adev);
2231 	if (r) {
2232 		DRM_ERROR("Failed to init rlc BOs!\n");
2233 		return r;
2234 	}
2235 
2236 	r = gfx_v9_0_mec_init(adev);
2237 	if (r) {
2238 		DRM_ERROR("Failed to init MEC BOs!\n");
2239 		return r;
2240 	}
2241 
2242 	/* set up the gfx ring */
2243 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2244 		ring = &adev->gfx.gfx_ring[i];
2245 		ring->ring_obj = NULL;
2246 		if (!i)
2247 			sprintf(ring->name, "gfx");
2248 		else
2249 			sprintf(ring->name, "gfx_%d", i);
2250 		ring->use_doorbell = true;
2251 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2252 		r = amdgpu_ring_init(adev, ring, 1024,
2253 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2254 		if (r)
2255 			return r;
2256 	}
2257 
2258 	/* set up the compute queues - allocate horizontally across pipes */
2259 	ring_id = 0;
2260 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2261 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2262 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2263 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2264 					continue;
2265 
2266 				r = gfx_v9_0_compute_ring_init(adev,
2267 							       ring_id,
2268 							       i, k, j);
2269 				if (r)
2270 					return r;
2271 
2272 				ring_id++;
2273 			}
2274 		}
2275 	}
2276 
2277 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2278 	if (r) {
2279 		DRM_ERROR("Failed to init KIQ BOs!\n");
2280 		return r;
2281 	}
2282 
2283 	kiq = &adev->gfx.kiq;
2284 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2285 	if (r)
2286 		return r;
2287 
2288 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2289 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2290 	if (r)
2291 		return r;
2292 
2293 	adev->gfx.ce_ram_size = 0x8000;
2294 
2295 	r = gfx_v9_0_gpu_early_init(adev);
2296 	if (r)
2297 		return r;
2298 
2299 	return 0;
2300 }
2301 
2302 
2303 static int gfx_v9_0_sw_fini(void *handle)
2304 {
2305 	int i;
2306 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2307 
2308 	amdgpu_gfx_ras_fini(adev);
2309 
2310 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2311 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2312 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2313 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2314 
2315 	amdgpu_gfx_mqd_sw_fini(adev);
2316 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2317 	amdgpu_gfx_kiq_fini(adev);
2318 
2319 	gfx_v9_0_mec_fini(adev);
2320 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2321 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2322 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2323 				&adev->gfx.rlc.cp_table_gpu_addr,
2324 				(void **)&adev->gfx.rlc.cp_table_ptr);
2325 	}
2326 	gfx_v9_0_free_microcode(adev);
2327 
2328 	return 0;
2329 }
2330 
2331 
2332 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2333 {
2334 	/* TODO */
2335 }
2336 
2337 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2338 {
2339 	u32 data;
2340 
2341 	if (instance == 0xffffffff)
2342 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2343 	else
2344 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2345 
2346 	if (se_num == 0xffffffff)
2347 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2348 	else
2349 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2350 
2351 	if (sh_num == 0xffffffff)
2352 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2353 	else
2354 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2355 
2356 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2357 }
2358 
2359 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2360 {
2361 	u32 data, mask;
2362 
2363 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2364 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2365 
2366 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2367 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2368 
2369 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2370 					 adev->gfx.config.max_sh_per_se);
2371 
2372 	return (~data) & mask;
2373 }
2374 
2375 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2376 {
2377 	int i, j;
2378 	u32 data;
2379 	u32 active_rbs = 0;
2380 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2381 					adev->gfx.config.max_sh_per_se;
2382 
2383 	mutex_lock(&adev->grbm_idx_mutex);
2384 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2385 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2386 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2387 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2388 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2389 					       rb_bitmap_width_per_sh);
2390 		}
2391 	}
2392 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2393 	mutex_unlock(&adev->grbm_idx_mutex);
2394 
2395 	adev->gfx.config.backend_enable_mask = active_rbs;
2396 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2397 }
2398 
2399 #define DEFAULT_SH_MEM_BASES	(0x6000)
2400 #define FIRST_COMPUTE_VMID	(8)
2401 #define LAST_COMPUTE_VMID	(16)
2402 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2403 {
2404 	int i;
2405 	uint32_t sh_mem_config;
2406 	uint32_t sh_mem_bases;
2407 
2408 	/*
2409 	 * Configure apertures:
2410 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2411 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2412 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2413 	 */
2414 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2415 
2416 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2417 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2418 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2419 
2420 	mutex_lock(&adev->srbm_mutex);
2421 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2422 		soc15_grbm_select(adev, 0, 0, 0, i);
2423 		/* CP and shaders */
2424 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2425 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2426 	}
2427 	soc15_grbm_select(adev, 0, 0, 0, 0);
2428 	mutex_unlock(&adev->srbm_mutex);
2429 
2430 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2431 	   acccess. These should be enabled by FW for target VMIDs. */
2432 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2433 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2434 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2435 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2436 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2437 	}
2438 }
2439 
2440 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2441 {
2442 	int vmid;
2443 
2444 	/*
2445 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2446 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2447 	 * the driver can enable them for graphics. VMID0 should maintain
2448 	 * access so that HWS firmware can save/restore entries.
2449 	 */
2450 	for (vmid = 1; vmid < 16; vmid++) {
2451 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2452 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2453 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2454 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2455 	}
2456 }
2457 
2458 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2459 {
2460 	uint32_t tmp;
2461 
2462 	switch (adev->asic_type) {
2463 	case CHIP_ARCTURUS:
2464 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2465 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2466 					DISABLE_BARRIER_WAITCNT, 1);
2467 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2468 		break;
2469 	default:
2470 		break;
2471 	};
2472 }
2473 
2474 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2475 {
2476 	u32 tmp;
2477 	int i;
2478 
2479 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2480 
2481 	gfx_v9_0_tiling_mode_table_init(adev);
2482 
2483 	gfx_v9_0_setup_rb(adev);
2484 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2485 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2486 
2487 	/* XXX SH_MEM regs */
2488 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2489 	mutex_lock(&adev->srbm_mutex);
2490 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2491 		soc15_grbm_select(adev, 0, 0, 0, i);
2492 		/* CP and shaders */
2493 		if (i == 0) {
2494 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2495 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2496 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2497 					    !!amdgpu_noretry);
2498 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2499 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2500 		} else {
2501 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2502 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2503 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2504 					    !!amdgpu_noretry);
2505 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2506 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2507 				(adev->gmc.private_aperture_start >> 48));
2508 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2509 				(adev->gmc.shared_aperture_start >> 48));
2510 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2511 		}
2512 	}
2513 	soc15_grbm_select(adev, 0, 0, 0, 0);
2514 
2515 	mutex_unlock(&adev->srbm_mutex);
2516 
2517 	gfx_v9_0_init_compute_vmid(adev);
2518 	gfx_v9_0_init_gds_vmid(adev);
2519 	gfx_v9_0_init_sq_config(adev);
2520 }
2521 
2522 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2523 {
2524 	u32 i, j, k;
2525 	u32 mask;
2526 
2527 	mutex_lock(&adev->grbm_idx_mutex);
2528 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2529 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2530 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2531 			for (k = 0; k < adev->usec_timeout; k++) {
2532 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2533 					break;
2534 				udelay(1);
2535 			}
2536 			if (k == adev->usec_timeout) {
2537 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2538 						      0xffffffff, 0xffffffff);
2539 				mutex_unlock(&adev->grbm_idx_mutex);
2540 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2541 					 i, j);
2542 				return;
2543 			}
2544 		}
2545 	}
2546 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2547 	mutex_unlock(&adev->grbm_idx_mutex);
2548 
2549 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2550 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2551 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2552 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2553 	for (k = 0; k < adev->usec_timeout; k++) {
2554 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2555 			break;
2556 		udelay(1);
2557 	}
2558 }
2559 
2560 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2561 					       bool enable)
2562 {
2563 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2564 
2565 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2566 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2567 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2568 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2569 
2570 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2571 }
2572 
2573 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2574 {
2575 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2576 	/* csib */
2577 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2578 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2579 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2580 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2581 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2582 			adev->gfx.rlc.clear_state_size);
2583 }
2584 
2585 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2586 				int indirect_offset,
2587 				int list_size,
2588 				int *unique_indirect_regs,
2589 				int unique_indirect_reg_count,
2590 				int *indirect_start_offsets,
2591 				int *indirect_start_offsets_count,
2592 				int max_start_offsets_count)
2593 {
2594 	int idx;
2595 
2596 	for (; indirect_offset < list_size; indirect_offset++) {
2597 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2598 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2599 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2600 
2601 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2602 			indirect_offset += 2;
2603 
2604 			/* look for the matching indice */
2605 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2606 				if (unique_indirect_regs[idx] ==
2607 					register_list_format[indirect_offset] ||
2608 					!unique_indirect_regs[idx])
2609 					break;
2610 			}
2611 
2612 			BUG_ON(idx >= unique_indirect_reg_count);
2613 
2614 			if (!unique_indirect_regs[idx])
2615 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2616 
2617 			indirect_offset++;
2618 		}
2619 	}
2620 }
2621 
2622 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2623 {
2624 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2625 	int unique_indirect_reg_count = 0;
2626 
2627 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2628 	int indirect_start_offsets_count = 0;
2629 
2630 	int list_size = 0;
2631 	int i = 0, j = 0;
2632 	u32 tmp = 0;
2633 
2634 	u32 *register_list_format =
2635 		kmemdup(adev->gfx.rlc.register_list_format,
2636 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2637 	if (!register_list_format)
2638 		return -ENOMEM;
2639 
2640 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2641 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2642 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2643 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2644 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2645 				    unique_indirect_regs,
2646 				    unique_indirect_reg_count,
2647 				    indirect_start_offsets,
2648 				    &indirect_start_offsets_count,
2649 				    ARRAY_SIZE(indirect_start_offsets));
2650 
2651 	/* enable auto inc in case it is disabled */
2652 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2653 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2654 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2655 
2656 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2657 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2658 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2659 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2660 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2661 			adev->gfx.rlc.register_restore[i]);
2662 
2663 	/* load indirect register */
2664 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2665 		adev->gfx.rlc.reg_list_format_start);
2666 
2667 	/* direct register portion */
2668 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2669 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2670 			register_list_format[i]);
2671 
2672 	/* indirect register portion */
2673 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2674 		if (register_list_format[i] == 0xFFFFFFFF) {
2675 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2676 			continue;
2677 		}
2678 
2679 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2680 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2681 
2682 		for (j = 0; j < unique_indirect_reg_count; j++) {
2683 			if (register_list_format[i] == unique_indirect_regs[j]) {
2684 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2685 				break;
2686 			}
2687 		}
2688 
2689 		BUG_ON(j >= unique_indirect_reg_count);
2690 
2691 		i++;
2692 	}
2693 
2694 	/* set save/restore list size */
2695 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2696 	list_size = list_size >> 1;
2697 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2698 		adev->gfx.rlc.reg_restore_list_size);
2699 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2700 
2701 	/* write the starting offsets to RLC scratch ram */
2702 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2703 		adev->gfx.rlc.starting_offsets_start);
2704 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2705 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2706 		       indirect_start_offsets[i]);
2707 
2708 	/* load unique indirect regs*/
2709 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2710 		if (unique_indirect_regs[i] != 0) {
2711 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2712 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2713 			       unique_indirect_regs[i] & 0x3FFFF);
2714 
2715 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2716 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2717 			       unique_indirect_regs[i] >> 20);
2718 		}
2719 	}
2720 
2721 	kfree(register_list_format);
2722 	return 0;
2723 }
2724 
2725 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2726 {
2727 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2728 }
2729 
2730 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2731 					     bool enable)
2732 {
2733 	uint32_t data = 0;
2734 	uint32_t default_data = 0;
2735 
2736 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2737 	if (enable == true) {
2738 		/* enable GFXIP control over CGPG */
2739 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2740 		if(default_data != data)
2741 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2742 
2743 		/* update status */
2744 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2745 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2746 		if(default_data != data)
2747 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2748 	} else {
2749 		/* restore GFXIP control over GCPG */
2750 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2751 		if(default_data != data)
2752 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2753 	}
2754 }
2755 
2756 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2757 {
2758 	uint32_t data = 0;
2759 
2760 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2761 			      AMD_PG_SUPPORT_GFX_SMG |
2762 			      AMD_PG_SUPPORT_GFX_DMG)) {
2763 		/* init IDLE_POLL_COUNT = 60 */
2764 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2765 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2766 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2767 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2768 
2769 		/* init RLC PG Delay */
2770 		data = 0;
2771 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2772 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2773 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2774 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2775 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2776 
2777 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2778 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2779 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2780 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2781 
2782 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2783 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2784 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2785 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2786 
2787 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2788 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2789 
2790 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2791 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2792 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2793 
2794 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2795 	}
2796 }
2797 
2798 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2799 						bool enable)
2800 {
2801 	uint32_t data = 0;
2802 	uint32_t default_data = 0;
2803 
2804 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2805 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2806 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2807 			     enable ? 1 : 0);
2808 	if (default_data != data)
2809 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2810 }
2811 
2812 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2813 						bool enable)
2814 {
2815 	uint32_t data = 0;
2816 	uint32_t default_data = 0;
2817 
2818 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2819 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2820 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2821 			     enable ? 1 : 0);
2822 	if(default_data != data)
2823 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2824 }
2825 
2826 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2827 					bool enable)
2828 {
2829 	uint32_t data = 0;
2830 	uint32_t default_data = 0;
2831 
2832 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2833 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2834 			     CP_PG_DISABLE,
2835 			     enable ? 0 : 1);
2836 	if(default_data != data)
2837 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2838 }
2839 
2840 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2841 						bool enable)
2842 {
2843 	uint32_t data, default_data;
2844 
2845 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2846 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2847 			     GFX_POWER_GATING_ENABLE,
2848 			     enable ? 1 : 0);
2849 	if(default_data != data)
2850 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2851 }
2852 
2853 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2854 						bool enable)
2855 {
2856 	uint32_t data, default_data;
2857 
2858 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2859 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2860 			     GFX_PIPELINE_PG_ENABLE,
2861 			     enable ? 1 : 0);
2862 	if(default_data != data)
2863 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2864 
2865 	if (!enable)
2866 		/* read any GFX register to wake up GFX */
2867 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2868 }
2869 
2870 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2871 						       bool enable)
2872 {
2873 	uint32_t data, default_data;
2874 
2875 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2876 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2877 			     STATIC_PER_CU_PG_ENABLE,
2878 			     enable ? 1 : 0);
2879 	if(default_data != data)
2880 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2881 }
2882 
2883 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2884 						bool enable)
2885 {
2886 	uint32_t data, default_data;
2887 
2888 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890 			     DYN_PER_CU_PG_ENABLE,
2891 			     enable ? 1 : 0);
2892 	if(default_data != data)
2893 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2894 }
2895 
2896 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2897 {
2898 	gfx_v9_0_init_csb(adev);
2899 
2900 	/*
2901 	 * Rlc save restore list is workable since v2_1.
2902 	 * And it's needed by gfxoff feature.
2903 	 */
2904 	if (adev->gfx.rlc.is_rlc_v2_1) {
2905 		if (adev->asic_type == CHIP_VEGA12 ||
2906 		    (adev->asic_type == CHIP_RAVEN &&
2907 		     adev->rev_id >= 8))
2908 			gfx_v9_1_init_rlc_save_restore_list(adev);
2909 		gfx_v9_0_enable_save_restore_machine(adev);
2910 	}
2911 
2912 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2913 			      AMD_PG_SUPPORT_GFX_SMG |
2914 			      AMD_PG_SUPPORT_GFX_DMG |
2915 			      AMD_PG_SUPPORT_CP |
2916 			      AMD_PG_SUPPORT_GDS |
2917 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2918 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2919 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2920 		gfx_v9_0_init_gfx_power_gating(adev);
2921 	}
2922 }
2923 
2924 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2925 {
2926 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2927 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2928 	gfx_v9_0_wait_for_rlc_serdes(adev);
2929 }
2930 
2931 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2932 {
2933 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2934 	udelay(50);
2935 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2936 	udelay(50);
2937 }
2938 
2939 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2940 {
2941 #ifdef AMDGPU_RLC_DEBUG_RETRY
2942 	u32 rlc_ucode_ver;
2943 #endif
2944 
2945 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2946 	udelay(50);
2947 
2948 	/* carrizo do enable cp interrupt after cp inited */
2949 	if (!(adev->flags & AMD_IS_APU)) {
2950 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2951 		udelay(50);
2952 	}
2953 
2954 #ifdef AMDGPU_RLC_DEBUG_RETRY
2955 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2956 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2957 	if(rlc_ucode_ver == 0x108) {
2958 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2959 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2960 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2961 		 * default is 0x9C4 to create a 100us interval */
2962 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2963 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2964 		 * to disable the page fault retry interrupts, default is
2965 		 * 0x100 (256) */
2966 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2967 	}
2968 #endif
2969 }
2970 
2971 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2972 {
2973 	const struct rlc_firmware_header_v2_0 *hdr;
2974 	const __le32 *fw_data;
2975 	unsigned i, fw_size;
2976 
2977 	if (!adev->gfx.rlc_fw)
2978 		return -EINVAL;
2979 
2980 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2981 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2982 
2983 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2984 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2985 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2986 
2987 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2988 			RLCG_UCODE_LOADING_START_ADDRESS);
2989 	for (i = 0; i < fw_size; i++)
2990 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2991 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2992 
2993 	return 0;
2994 }
2995 
2996 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2997 {
2998 	int r;
2999 
3000 	if (amdgpu_sriov_vf(adev)) {
3001 		gfx_v9_0_init_csb(adev);
3002 		return 0;
3003 	}
3004 
3005 	adev->gfx.rlc.funcs->stop(adev);
3006 
3007 	/* disable CG */
3008 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3009 
3010 	gfx_v9_0_init_pg(adev);
3011 
3012 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3013 		/* legacy rlc firmware loading */
3014 		r = gfx_v9_0_rlc_load_microcode(adev);
3015 		if (r)
3016 			return r;
3017 	}
3018 
3019 	switch (adev->asic_type) {
3020 	case CHIP_RAVEN:
3021 		if (amdgpu_lbpw == 0)
3022 			gfx_v9_0_enable_lbpw(adev, false);
3023 		else
3024 			gfx_v9_0_enable_lbpw(adev, true);
3025 		break;
3026 	case CHIP_VEGA20:
3027 		if (amdgpu_lbpw > 0)
3028 			gfx_v9_0_enable_lbpw(adev, true);
3029 		else
3030 			gfx_v9_0_enable_lbpw(adev, false);
3031 		break;
3032 	default:
3033 		break;
3034 	}
3035 
3036 	adev->gfx.rlc.funcs->start(adev);
3037 
3038 	return 0;
3039 }
3040 
3041 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3042 {
3043 	int i;
3044 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3045 
3046 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3047 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3048 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3049 	if (!enable) {
3050 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3051 			adev->gfx.gfx_ring[i].sched.ready = false;
3052 	}
3053 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3054 	udelay(50);
3055 }
3056 
3057 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3058 {
3059 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3060 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3061 	const struct gfx_firmware_header_v1_0 *me_hdr;
3062 	const __le32 *fw_data;
3063 	unsigned i, fw_size;
3064 
3065 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3066 		return -EINVAL;
3067 
3068 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3069 		adev->gfx.pfp_fw->data;
3070 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3071 		adev->gfx.ce_fw->data;
3072 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3073 		adev->gfx.me_fw->data;
3074 
3075 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3076 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3077 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3078 
3079 	gfx_v9_0_cp_gfx_enable(adev, false);
3080 
3081 	/* PFP */
3082 	fw_data = (const __le32 *)
3083 		(adev->gfx.pfp_fw->data +
3084 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3085 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3086 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3087 	for (i = 0; i < fw_size; i++)
3088 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3089 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3090 
3091 	/* CE */
3092 	fw_data = (const __le32 *)
3093 		(adev->gfx.ce_fw->data +
3094 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3095 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3096 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3097 	for (i = 0; i < fw_size; i++)
3098 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3099 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3100 
3101 	/* ME */
3102 	fw_data = (const __le32 *)
3103 		(adev->gfx.me_fw->data +
3104 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3105 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3106 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3107 	for (i = 0; i < fw_size; i++)
3108 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3109 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3110 
3111 	return 0;
3112 }
3113 
3114 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3115 {
3116 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3117 	const struct cs_section_def *sect = NULL;
3118 	const struct cs_extent_def *ext = NULL;
3119 	int r, i, tmp;
3120 
3121 	/* init the CP */
3122 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3123 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3124 
3125 	gfx_v9_0_cp_gfx_enable(adev, true);
3126 
3127 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3128 	if (r) {
3129 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3130 		return r;
3131 	}
3132 
3133 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3134 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3135 
3136 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3137 	amdgpu_ring_write(ring, 0x80000000);
3138 	amdgpu_ring_write(ring, 0x80000000);
3139 
3140 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3141 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3142 			if (sect->id == SECT_CONTEXT) {
3143 				amdgpu_ring_write(ring,
3144 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3145 					       ext->reg_count));
3146 				amdgpu_ring_write(ring,
3147 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3148 				for (i = 0; i < ext->reg_count; i++)
3149 					amdgpu_ring_write(ring, ext->extent[i]);
3150 			}
3151 		}
3152 	}
3153 
3154 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3155 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3156 
3157 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3158 	amdgpu_ring_write(ring, 0);
3159 
3160 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3161 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3162 	amdgpu_ring_write(ring, 0x8000);
3163 	amdgpu_ring_write(ring, 0x8000);
3164 
3165 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3166 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3167 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3168 	amdgpu_ring_write(ring, tmp);
3169 	amdgpu_ring_write(ring, 0);
3170 
3171 	amdgpu_ring_commit(ring);
3172 
3173 	return 0;
3174 }
3175 
3176 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3177 {
3178 	struct amdgpu_ring *ring;
3179 	u32 tmp;
3180 	u32 rb_bufsz;
3181 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3182 
3183 	/* Set the write pointer delay */
3184 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3185 
3186 	/* set the RB to use vmid 0 */
3187 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3188 
3189 	/* Set ring buffer size */
3190 	ring = &adev->gfx.gfx_ring[0];
3191 	rb_bufsz = order_base_2(ring->ring_size / 8);
3192 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3193 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3194 #ifdef __BIG_ENDIAN
3195 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3196 #endif
3197 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3198 
3199 	/* Initialize the ring buffer's write pointers */
3200 	ring->wptr = 0;
3201 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3202 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3203 
3204 	/* set the wb address wether it's enabled or not */
3205 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3206 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3207 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3208 
3209 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3210 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3211 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3212 
3213 	mdelay(1);
3214 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3215 
3216 	rb_addr = ring->gpu_addr >> 8;
3217 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3218 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3219 
3220 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3221 	if (ring->use_doorbell) {
3222 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3223 				    DOORBELL_OFFSET, ring->doorbell_index);
3224 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3225 				    DOORBELL_EN, 1);
3226 	} else {
3227 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3228 	}
3229 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3230 
3231 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3232 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3233 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3234 
3235 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3236 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3237 
3238 
3239 	/* start the ring */
3240 	gfx_v9_0_cp_gfx_start(adev);
3241 	ring->sched.ready = true;
3242 
3243 	return 0;
3244 }
3245 
3246 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3247 {
3248 	int i;
3249 
3250 	if (enable) {
3251 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3252 	} else {
3253 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3254 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3255 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3256 			adev->gfx.compute_ring[i].sched.ready = false;
3257 		adev->gfx.kiq.ring.sched.ready = false;
3258 	}
3259 	udelay(50);
3260 }
3261 
3262 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3263 {
3264 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3265 	const __le32 *fw_data;
3266 	unsigned i;
3267 	u32 tmp;
3268 
3269 	if (!adev->gfx.mec_fw)
3270 		return -EINVAL;
3271 
3272 	gfx_v9_0_cp_compute_enable(adev, false);
3273 
3274 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3275 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3276 
3277 	fw_data = (const __le32 *)
3278 		(adev->gfx.mec_fw->data +
3279 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3280 	tmp = 0;
3281 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3282 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3283 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3284 
3285 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3286 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3287 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3288 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3289 
3290 	/* MEC1 */
3291 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3292 			 mec_hdr->jt_offset);
3293 	for (i = 0; i < mec_hdr->jt_size; i++)
3294 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3295 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3296 
3297 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3298 			adev->gfx.mec_fw_version);
3299 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3300 
3301 	return 0;
3302 }
3303 
3304 /* KIQ functions */
3305 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3306 {
3307 	uint32_t tmp;
3308 	struct amdgpu_device *adev = ring->adev;
3309 
3310 	/* tell RLC which is KIQ queue */
3311 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3312 	tmp &= 0xffffff00;
3313 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3314 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3315 	tmp |= 0x80;
3316 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3317 }
3318 
3319 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3320 {
3321 	struct amdgpu_device *adev = ring->adev;
3322 	struct v9_mqd *mqd = ring->mqd_ptr;
3323 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3324 	uint32_t tmp;
3325 
3326 	mqd->header = 0xC0310800;
3327 	mqd->compute_pipelinestat_enable = 0x00000001;
3328 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3329 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3330 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3331 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3332 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3333 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3334 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3335 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3336 	mqd->compute_misc_reserved = 0x00000003;
3337 
3338 	mqd->dynamic_cu_mask_addr_lo =
3339 		lower_32_bits(ring->mqd_gpu_addr
3340 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3341 	mqd->dynamic_cu_mask_addr_hi =
3342 		upper_32_bits(ring->mqd_gpu_addr
3343 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3344 
3345 	eop_base_addr = ring->eop_gpu_addr >> 8;
3346 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3347 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3348 
3349 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3350 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3351 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3352 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3353 
3354 	mqd->cp_hqd_eop_control = tmp;
3355 
3356 	/* enable doorbell? */
3357 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3358 
3359 	if (ring->use_doorbell) {
3360 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3361 				    DOORBELL_OFFSET, ring->doorbell_index);
3362 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3363 				    DOORBELL_EN, 1);
3364 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3365 				    DOORBELL_SOURCE, 0);
3366 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3367 				    DOORBELL_HIT, 0);
3368 	} else {
3369 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3370 					 DOORBELL_EN, 0);
3371 	}
3372 
3373 	mqd->cp_hqd_pq_doorbell_control = tmp;
3374 
3375 	/* disable the queue if it's active */
3376 	ring->wptr = 0;
3377 	mqd->cp_hqd_dequeue_request = 0;
3378 	mqd->cp_hqd_pq_rptr = 0;
3379 	mqd->cp_hqd_pq_wptr_lo = 0;
3380 	mqd->cp_hqd_pq_wptr_hi = 0;
3381 
3382 	/* set the pointer to the MQD */
3383 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3384 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3385 
3386 	/* set MQD vmid to 0 */
3387 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3388 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3389 	mqd->cp_mqd_control = tmp;
3390 
3391 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3392 	hqd_gpu_addr = ring->gpu_addr >> 8;
3393 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3394 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3395 
3396 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3397 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3398 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3399 			    (order_base_2(ring->ring_size / 4) - 1));
3400 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3401 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3402 #ifdef __BIG_ENDIAN
3403 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3404 #endif
3405 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3406 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3407 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3408 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3409 	mqd->cp_hqd_pq_control = tmp;
3410 
3411 	/* set the wb address whether it's enabled or not */
3412 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3413 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3414 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3415 		upper_32_bits(wb_gpu_addr) & 0xffff;
3416 
3417 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3418 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3419 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3420 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3421 
3422 	tmp = 0;
3423 	/* enable the doorbell if requested */
3424 	if (ring->use_doorbell) {
3425 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3426 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3427 				DOORBELL_OFFSET, ring->doorbell_index);
3428 
3429 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3430 					 DOORBELL_EN, 1);
3431 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3432 					 DOORBELL_SOURCE, 0);
3433 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3434 					 DOORBELL_HIT, 0);
3435 	}
3436 
3437 	mqd->cp_hqd_pq_doorbell_control = tmp;
3438 
3439 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3440 	ring->wptr = 0;
3441 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3442 
3443 	/* set the vmid for the queue */
3444 	mqd->cp_hqd_vmid = 0;
3445 
3446 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3447 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3448 	mqd->cp_hqd_persistent_state = tmp;
3449 
3450 	/* set MIN_IB_AVAIL_SIZE */
3451 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3452 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3453 	mqd->cp_hqd_ib_control = tmp;
3454 
3455 	/* map_queues packet doesn't need activate the queue,
3456 	 * so only kiq need set this field.
3457 	 */
3458 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3459 		mqd->cp_hqd_active = 1;
3460 
3461 	return 0;
3462 }
3463 
3464 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3465 {
3466 	struct amdgpu_device *adev = ring->adev;
3467 	struct v9_mqd *mqd = ring->mqd_ptr;
3468 	int j;
3469 
3470 	/* disable wptr polling */
3471 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3472 
3473 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3474 	       mqd->cp_hqd_eop_base_addr_lo);
3475 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3476 	       mqd->cp_hqd_eop_base_addr_hi);
3477 
3478 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3479 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3480 	       mqd->cp_hqd_eop_control);
3481 
3482 	/* enable doorbell? */
3483 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3484 	       mqd->cp_hqd_pq_doorbell_control);
3485 
3486 	/* disable the queue if it's active */
3487 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3488 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3489 		for (j = 0; j < adev->usec_timeout; j++) {
3490 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3491 				break;
3492 			udelay(1);
3493 		}
3494 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3495 		       mqd->cp_hqd_dequeue_request);
3496 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3497 		       mqd->cp_hqd_pq_rptr);
3498 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3499 		       mqd->cp_hqd_pq_wptr_lo);
3500 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3501 		       mqd->cp_hqd_pq_wptr_hi);
3502 	}
3503 
3504 	/* set the pointer to the MQD */
3505 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3506 	       mqd->cp_mqd_base_addr_lo);
3507 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3508 	       mqd->cp_mqd_base_addr_hi);
3509 
3510 	/* set MQD vmid to 0 */
3511 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3512 	       mqd->cp_mqd_control);
3513 
3514 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3515 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3516 	       mqd->cp_hqd_pq_base_lo);
3517 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3518 	       mqd->cp_hqd_pq_base_hi);
3519 
3520 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3521 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3522 	       mqd->cp_hqd_pq_control);
3523 
3524 	/* set the wb address whether it's enabled or not */
3525 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3526 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3527 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3528 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3529 
3530 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3531 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3532 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3533 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3534 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3535 
3536 	/* enable the doorbell if requested */
3537 	if (ring->use_doorbell) {
3538 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3539 					(adev->doorbell_index.kiq * 2) << 2);
3540 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3541 					(adev->doorbell_index.userqueue_end * 2) << 2);
3542 	}
3543 
3544 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3545 	       mqd->cp_hqd_pq_doorbell_control);
3546 
3547 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3548 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3549 	       mqd->cp_hqd_pq_wptr_lo);
3550 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3551 	       mqd->cp_hqd_pq_wptr_hi);
3552 
3553 	/* set the vmid for the queue */
3554 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3555 
3556 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3557 	       mqd->cp_hqd_persistent_state);
3558 
3559 	/* activate the queue */
3560 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3561 	       mqd->cp_hqd_active);
3562 
3563 	if (ring->use_doorbell)
3564 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3565 
3566 	return 0;
3567 }
3568 
3569 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3570 {
3571 	struct amdgpu_device *adev = ring->adev;
3572 	int j;
3573 
3574 	/* disable the queue if it's active */
3575 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3576 
3577 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3578 
3579 		for (j = 0; j < adev->usec_timeout; j++) {
3580 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3581 				break;
3582 			udelay(1);
3583 		}
3584 
3585 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3586 			DRM_DEBUG("KIQ dequeue request failed.\n");
3587 
3588 			/* Manual disable if dequeue request times out */
3589 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3590 		}
3591 
3592 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3593 		      0);
3594 	}
3595 
3596 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3597 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3598 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3599 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3600 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3601 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3602 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3603 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3604 
3605 	return 0;
3606 }
3607 
3608 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3609 {
3610 	struct amdgpu_device *adev = ring->adev;
3611 	struct v9_mqd *mqd = ring->mqd_ptr;
3612 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3613 
3614 	gfx_v9_0_kiq_setting(ring);
3615 
3616 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3617 		/* reset MQD to a clean status */
3618 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3619 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3620 
3621 		/* reset ring buffer */
3622 		ring->wptr = 0;
3623 		amdgpu_ring_clear_ring(ring);
3624 
3625 		mutex_lock(&adev->srbm_mutex);
3626 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3627 		gfx_v9_0_kiq_init_register(ring);
3628 		soc15_grbm_select(adev, 0, 0, 0, 0);
3629 		mutex_unlock(&adev->srbm_mutex);
3630 	} else {
3631 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3632 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3633 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3634 		mutex_lock(&adev->srbm_mutex);
3635 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3636 		gfx_v9_0_mqd_init(ring);
3637 		gfx_v9_0_kiq_init_register(ring);
3638 		soc15_grbm_select(adev, 0, 0, 0, 0);
3639 		mutex_unlock(&adev->srbm_mutex);
3640 
3641 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3642 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3643 	}
3644 
3645 	return 0;
3646 }
3647 
3648 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3649 {
3650 	struct amdgpu_device *adev = ring->adev;
3651 	struct v9_mqd *mqd = ring->mqd_ptr;
3652 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3653 
3654 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3655 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3656 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3657 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3658 		mutex_lock(&adev->srbm_mutex);
3659 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3660 		gfx_v9_0_mqd_init(ring);
3661 		soc15_grbm_select(adev, 0, 0, 0, 0);
3662 		mutex_unlock(&adev->srbm_mutex);
3663 
3664 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3665 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3666 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3667 		/* reset MQD to a clean status */
3668 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3669 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3670 
3671 		/* reset ring buffer */
3672 		ring->wptr = 0;
3673 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3674 		amdgpu_ring_clear_ring(ring);
3675 	} else {
3676 		amdgpu_ring_clear_ring(ring);
3677 	}
3678 
3679 	return 0;
3680 }
3681 
3682 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3683 {
3684 	struct amdgpu_ring *ring;
3685 	int r;
3686 
3687 	ring = &adev->gfx.kiq.ring;
3688 
3689 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3690 	if (unlikely(r != 0))
3691 		return r;
3692 
3693 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3694 	if (unlikely(r != 0))
3695 		return r;
3696 
3697 	gfx_v9_0_kiq_init_queue(ring);
3698 	amdgpu_bo_kunmap(ring->mqd_obj);
3699 	ring->mqd_ptr = NULL;
3700 	amdgpu_bo_unreserve(ring->mqd_obj);
3701 	ring->sched.ready = true;
3702 	return 0;
3703 }
3704 
3705 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3706 {
3707 	struct amdgpu_ring *ring = NULL;
3708 	int r = 0, i;
3709 
3710 	gfx_v9_0_cp_compute_enable(adev, true);
3711 
3712 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3713 		ring = &adev->gfx.compute_ring[i];
3714 
3715 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3716 		if (unlikely(r != 0))
3717 			goto done;
3718 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3719 		if (!r) {
3720 			r = gfx_v9_0_kcq_init_queue(ring);
3721 			amdgpu_bo_kunmap(ring->mqd_obj);
3722 			ring->mqd_ptr = NULL;
3723 		}
3724 		amdgpu_bo_unreserve(ring->mqd_obj);
3725 		if (r)
3726 			goto done;
3727 	}
3728 
3729 	r = amdgpu_gfx_enable_kcq(adev);
3730 done:
3731 	return r;
3732 }
3733 
3734 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3735 {
3736 	int r, i;
3737 	struct amdgpu_ring *ring;
3738 
3739 	if (!(adev->flags & AMD_IS_APU))
3740 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3741 
3742 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3743 		if (adev->asic_type != CHIP_ARCTURUS) {
3744 			/* legacy firmware loading */
3745 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3746 			if (r)
3747 				return r;
3748 		}
3749 
3750 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3751 		if (r)
3752 			return r;
3753 	}
3754 
3755 	r = gfx_v9_0_kiq_resume(adev);
3756 	if (r)
3757 		return r;
3758 
3759 	if (adev->asic_type != CHIP_ARCTURUS) {
3760 		r = gfx_v9_0_cp_gfx_resume(adev);
3761 		if (r)
3762 			return r;
3763 	}
3764 
3765 	r = gfx_v9_0_kcq_resume(adev);
3766 	if (r)
3767 		return r;
3768 
3769 	if (adev->asic_type != CHIP_ARCTURUS) {
3770 		ring = &adev->gfx.gfx_ring[0];
3771 		r = amdgpu_ring_test_helper(ring);
3772 		if (r)
3773 			return r;
3774 	}
3775 
3776 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3777 		ring = &adev->gfx.compute_ring[i];
3778 		amdgpu_ring_test_helper(ring);
3779 	}
3780 
3781 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3782 
3783 	return 0;
3784 }
3785 
3786 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3787 {
3788 	u32 tmp;
3789 
3790 	if (adev->asic_type != CHIP_ARCTURUS)
3791 		return;
3792 
3793 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3794 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3795 				adev->df.hash_status.hash_64k);
3796 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3797 				adev->df.hash_status.hash_2m);
3798 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3799 				adev->df.hash_status.hash_1g);
3800 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3801 }
3802 
3803 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3804 {
3805 	if (adev->asic_type != CHIP_ARCTURUS)
3806 		gfx_v9_0_cp_gfx_enable(adev, enable);
3807 	gfx_v9_0_cp_compute_enable(adev, enable);
3808 }
3809 
3810 static int gfx_v9_0_hw_init(void *handle)
3811 {
3812 	int r;
3813 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3814 
3815 	if (!amdgpu_sriov_vf(adev))
3816 		gfx_v9_0_init_golden_registers(adev);
3817 
3818 	gfx_v9_0_constants_init(adev);
3819 
3820 	gfx_v9_0_init_tcp_config(adev);
3821 
3822 	r = adev->gfx.rlc.funcs->resume(adev);
3823 	if (r)
3824 		return r;
3825 
3826 	r = gfx_v9_0_cp_resume(adev);
3827 	if (r)
3828 		return r;
3829 
3830 	return r;
3831 }
3832 
3833 static int gfx_v9_0_hw_fini(void *handle)
3834 {
3835 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3836 
3837 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3838 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3839 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3840 
3841 	/* DF freeze and kcq disable will fail */
3842 	if (!amdgpu_ras_intr_triggered())
3843 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3844 		amdgpu_gfx_disable_kcq(adev);
3845 
3846 	if (amdgpu_sriov_vf(adev)) {
3847 		gfx_v9_0_cp_gfx_enable(adev, false);
3848 		/* must disable polling for SRIOV when hw finished, otherwise
3849 		 * CPC engine may still keep fetching WB address which is already
3850 		 * invalid after sw finished and trigger DMAR reading error in
3851 		 * hypervisor side.
3852 		 */
3853 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3854 		return 0;
3855 	}
3856 
3857 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3858 	 * otherwise KIQ is hanging when binding back
3859 	 */
3860 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3861 		mutex_lock(&adev->srbm_mutex);
3862 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3863 				adev->gfx.kiq.ring.pipe,
3864 				adev->gfx.kiq.ring.queue, 0);
3865 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3866 		soc15_grbm_select(adev, 0, 0, 0, 0);
3867 		mutex_unlock(&adev->srbm_mutex);
3868 	}
3869 
3870 	gfx_v9_0_cp_enable(adev, false);
3871 	adev->gfx.rlc.funcs->stop(adev);
3872 
3873 	return 0;
3874 }
3875 
3876 static int gfx_v9_0_suspend(void *handle)
3877 {
3878 	return gfx_v9_0_hw_fini(handle);
3879 }
3880 
3881 static int gfx_v9_0_resume(void *handle)
3882 {
3883 	return gfx_v9_0_hw_init(handle);
3884 }
3885 
3886 static bool gfx_v9_0_is_idle(void *handle)
3887 {
3888 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3889 
3890 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3891 				GRBM_STATUS, GUI_ACTIVE))
3892 		return false;
3893 	else
3894 		return true;
3895 }
3896 
3897 static int gfx_v9_0_wait_for_idle(void *handle)
3898 {
3899 	unsigned i;
3900 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3901 
3902 	for (i = 0; i < adev->usec_timeout; i++) {
3903 		if (gfx_v9_0_is_idle(handle))
3904 			return 0;
3905 		udelay(1);
3906 	}
3907 	return -ETIMEDOUT;
3908 }
3909 
3910 static int gfx_v9_0_soft_reset(void *handle)
3911 {
3912 	u32 grbm_soft_reset = 0;
3913 	u32 tmp;
3914 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3915 
3916 	/* GRBM_STATUS */
3917 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3918 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3919 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3920 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3921 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3922 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3923 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3924 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3925 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3926 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3927 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3928 	}
3929 
3930 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3931 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3932 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3933 	}
3934 
3935 	/* GRBM_STATUS2 */
3936 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3937 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3938 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3939 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3940 
3941 
3942 	if (grbm_soft_reset) {
3943 		/* stop the rlc */
3944 		adev->gfx.rlc.funcs->stop(adev);
3945 
3946 		if (adev->asic_type != CHIP_ARCTURUS)
3947 			/* Disable GFX parsing/prefetching */
3948 			gfx_v9_0_cp_gfx_enable(adev, false);
3949 
3950 		/* Disable MEC parsing/prefetching */
3951 		gfx_v9_0_cp_compute_enable(adev, false);
3952 
3953 		if (grbm_soft_reset) {
3954 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3955 			tmp |= grbm_soft_reset;
3956 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3957 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3958 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3959 
3960 			udelay(50);
3961 
3962 			tmp &= ~grbm_soft_reset;
3963 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3964 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3965 		}
3966 
3967 		/* Wait a little for things to settle down */
3968 		udelay(50);
3969 	}
3970 	return 0;
3971 }
3972 
3973 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3974 {
3975 	signed long r, cnt = 0;
3976 	unsigned long flags;
3977 	uint32_t seq;
3978 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3979 	struct amdgpu_ring *ring = &kiq->ring;
3980 
3981 	BUG_ON(!ring->funcs->emit_rreg);
3982 
3983 	spin_lock_irqsave(&kiq->ring_lock, flags);
3984 	amdgpu_ring_alloc(ring, 32);
3985 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3986 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3987 				(5 << 8) |	/* dst: memory */
3988 				(1 << 16) |	/* count sel */
3989 				(1 << 20));	/* write confirm */
3990 	amdgpu_ring_write(ring, 0);
3991 	amdgpu_ring_write(ring, 0);
3992 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3993 				kiq->reg_val_offs * 4));
3994 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3995 				kiq->reg_val_offs * 4));
3996 	amdgpu_fence_emit_polling(ring, &seq);
3997 	amdgpu_ring_commit(ring);
3998 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3999 
4000 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4001 
4002 	/* don't wait anymore for gpu reset case because this way may
4003 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4004 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4005 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4006 	 * gpu_recover() hang there.
4007 	 *
4008 	 * also don't wait anymore for IRQ context
4009 	 * */
4010 	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4011 		goto failed_kiq_read;
4012 
4013 	might_sleep();
4014 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4015 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4016 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4017 	}
4018 
4019 	if (cnt > MAX_KIQ_REG_TRY)
4020 		goto failed_kiq_read;
4021 
4022 	return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
4023 		(uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
4024 
4025 failed_kiq_read:
4026 	pr_err("failed to read gpu clock\n");
4027 	return ~0;
4028 }
4029 
4030 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4031 {
4032 	uint64_t clock;
4033 
4034 	amdgpu_gfx_off_ctrl(adev, false);
4035 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4036 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4037 		clock = gfx_v9_0_kiq_read_clock(adev);
4038 	} else {
4039 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4040 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4041 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4042 	}
4043 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4044 	amdgpu_gfx_off_ctrl(adev, true);
4045 	return clock;
4046 }
4047 
4048 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4049 					  uint32_t vmid,
4050 					  uint32_t gds_base, uint32_t gds_size,
4051 					  uint32_t gws_base, uint32_t gws_size,
4052 					  uint32_t oa_base, uint32_t oa_size)
4053 {
4054 	struct amdgpu_device *adev = ring->adev;
4055 
4056 	/* GDS Base */
4057 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4058 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4059 				   gds_base);
4060 
4061 	/* GDS Size */
4062 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4063 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4064 				   gds_size);
4065 
4066 	/* GWS */
4067 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4068 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4069 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4070 
4071 	/* OA */
4072 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4073 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4074 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4075 }
4076 
4077 static const u32 vgpr_init_compute_shader[] =
4078 {
4079 	0xb07c0000, 0xbe8000ff,
4080 	0x000000f8, 0xbf110800,
4081 	0x7e000280, 0x7e020280,
4082 	0x7e040280, 0x7e060280,
4083 	0x7e080280, 0x7e0a0280,
4084 	0x7e0c0280, 0x7e0e0280,
4085 	0x80808800, 0xbe803200,
4086 	0xbf84fff5, 0xbf9c0000,
4087 	0xd28c0001, 0x0001007f,
4088 	0xd28d0001, 0x0002027e,
4089 	0x10020288, 0xb8810904,
4090 	0xb7814000, 0xd1196a01,
4091 	0x00000301, 0xbe800087,
4092 	0xbefc00c1, 0xd89c4000,
4093 	0x00020201, 0xd89cc080,
4094 	0x00040401, 0x320202ff,
4095 	0x00000800, 0x80808100,
4096 	0xbf84fff8, 0x7e020280,
4097 	0xbf810000, 0x00000000,
4098 };
4099 
4100 static const u32 sgpr_init_compute_shader[] =
4101 {
4102 	0xb07c0000, 0xbe8000ff,
4103 	0x0000005f, 0xbee50080,
4104 	0xbe812c65, 0xbe822c65,
4105 	0xbe832c65, 0xbe842c65,
4106 	0xbe852c65, 0xb77c0005,
4107 	0x80808500, 0xbf84fff8,
4108 	0xbe800080, 0xbf810000,
4109 };
4110 
4111 /* When below register arrays changed, please update gpr_reg_size,
4112   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4113   to cover all gfx9 ASICs */
4114 static const struct soc15_reg_entry vgpr_init_regs[] = {
4115    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4116    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4117    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4118    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4119    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4120    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4121    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4122    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4123    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4124    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4125    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4126    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4127    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4128    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4129 };
4130 
4131 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4132    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4133    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4134    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4135    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4136    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4137    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4138    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4139    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4140    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4141    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4146 };
4147 
4148 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4151    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4152    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4153    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4163 };
4164 
4165 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4166    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4167    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4168    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4169    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4173    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4174    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4175    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4176    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4177    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4178    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4179    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4180    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4181    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4183    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4185    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4186    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4187    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4188    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4189    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4194    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4195    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4196    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4198    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4199 };
4200 
4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4202 {
4203 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4204 	int i, r;
4205 
4206 	/* only support when RAS is enabled */
4207 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4208 		return 0;
4209 
4210 	r = amdgpu_ring_alloc(ring, 7);
4211 	if (r) {
4212 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4213 			ring->name, r);
4214 		return r;
4215 	}
4216 
4217 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4218 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4219 
4220 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4221 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4222 				PACKET3_DMA_DATA_DST_SEL(1) |
4223 				PACKET3_DMA_DATA_SRC_SEL(2) |
4224 				PACKET3_DMA_DATA_ENGINE(0)));
4225 	amdgpu_ring_write(ring, 0);
4226 	amdgpu_ring_write(ring, 0);
4227 	amdgpu_ring_write(ring, 0);
4228 	amdgpu_ring_write(ring, 0);
4229 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4230 				adev->gds.gds_size);
4231 
4232 	amdgpu_ring_commit(ring);
4233 
4234 	for (i = 0; i < adev->usec_timeout; i++) {
4235 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4236 			break;
4237 		udelay(1);
4238 	}
4239 
4240 	if (i >= adev->usec_timeout)
4241 		r = -ETIMEDOUT;
4242 
4243 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4244 
4245 	return r;
4246 }
4247 
4248 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4249 {
4250 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4251 	struct amdgpu_ib ib;
4252 	struct dma_fence *f = NULL;
4253 	int r, i;
4254 	unsigned total_size, vgpr_offset, sgpr_offset;
4255 	u64 gpu_addr;
4256 
4257 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4258 						adev->gfx.config.max_cu_per_sh *
4259 						adev->gfx.config.max_sh_per_se;
4260 	int sgpr_work_group_size = 5;
4261 	int gpr_reg_size = compute_dim_x / 16 + 6;
4262 
4263 	/* only support when RAS is enabled */
4264 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4265 		return 0;
4266 
4267 	/* bail if the compute ring is not ready */
4268 	if (!ring->sched.ready)
4269 		return 0;
4270 
4271 	total_size =
4272 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4273 	total_size +=
4274 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4275 	total_size +=
4276 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4277 	total_size = ALIGN(total_size, 256);
4278 	vgpr_offset = total_size;
4279 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4280 	sgpr_offset = total_size;
4281 	total_size += sizeof(sgpr_init_compute_shader);
4282 
4283 	/* allocate an indirect buffer to put the commands in */
4284 	memset(&ib, 0, sizeof(ib));
4285 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4286 	if (r) {
4287 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4288 		return r;
4289 	}
4290 
4291 	/* load the compute shaders */
4292 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4293 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4294 
4295 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4296 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4297 
4298 	/* init the ib length to 0 */
4299 	ib.length_dw = 0;
4300 
4301 	/* VGPR */
4302 	/* write the register state for the compute dispatch */
4303 	for (i = 0; i < gpr_reg_size; i++) {
4304 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4305 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4306 								- PACKET3_SET_SH_REG_START;
4307 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4308 	}
4309 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4310 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4311 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4312 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4313 							- PACKET3_SET_SH_REG_START;
4314 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4315 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4316 
4317 	/* write dispatch packet */
4318 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4319 	ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4320 	ib.ptr[ib.length_dw++] = 1; /* y */
4321 	ib.ptr[ib.length_dw++] = 1; /* z */
4322 	ib.ptr[ib.length_dw++] =
4323 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4324 
4325 	/* write CS partial flush packet */
4326 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4327 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4328 
4329 	/* SGPR1 */
4330 	/* write the register state for the compute dispatch */
4331 	for (i = 0; i < gpr_reg_size; i++) {
4332 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4333 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4334 								- PACKET3_SET_SH_REG_START;
4335 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4336 	}
4337 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4338 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4339 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4340 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4341 							- PACKET3_SET_SH_REG_START;
4342 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4343 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4344 
4345 	/* write dispatch packet */
4346 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4347 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4348 	ib.ptr[ib.length_dw++] = 1; /* y */
4349 	ib.ptr[ib.length_dw++] = 1; /* z */
4350 	ib.ptr[ib.length_dw++] =
4351 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4352 
4353 	/* write CS partial flush packet */
4354 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4355 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4356 
4357 	/* SGPR2 */
4358 	/* write the register state for the compute dispatch */
4359 	for (i = 0; i < gpr_reg_size; i++) {
4360 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4361 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4362 								- PACKET3_SET_SH_REG_START;
4363 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4364 	}
4365 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4366 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4367 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4368 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4369 							- PACKET3_SET_SH_REG_START;
4370 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4371 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4372 
4373 	/* write dispatch packet */
4374 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4375 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4376 	ib.ptr[ib.length_dw++] = 1; /* y */
4377 	ib.ptr[ib.length_dw++] = 1; /* z */
4378 	ib.ptr[ib.length_dw++] =
4379 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4380 
4381 	/* write CS partial flush packet */
4382 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4383 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4384 
4385 	/* shedule the ib on the ring */
4386 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4387 	if (r) {
4388 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4389 		goto fail;
4390 	}
4391 
4392 	/* wait for the GPU to finish processing the IB */
4393 	r = dma_fence_wait(f, false);
4394 	if (r) {
4395 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4396 		goto fail;
4397 	}
4398 
4399 fail:
4400 	amdgpu_ib_free(adev, &ib, NULL);
4401 	dma_fence_put(f);
4402 
4403 	return r;
4404 }
4405 
4406 static int gfx_v9_0_early_init(void *handle)
4407 {
4408 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4409 
4410 	if (adev->asic_type == CHIP_ARCTURUS)
4411 		adev->gfx.num_gfx_rings = 0;
4412 	else
4413 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4414 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4415 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4416 	gfx_v9_0_set_ring_funcs(adev);
4417 	gfx_v9_0_set_irq_funcs(adev);
4418 	gfx_v9_0_set_gds_init(adev);
4419 	gfx_v9_0_set_rlc_funcs(adev);
4420 
4421 	return 0;
4422 }
4423 
4424 static int gfx_v9_0_ecc_late_init(void *handle)
4425 {
4426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4427 	int r;
4428 
4429 	/*
4430 	 * Temp workaround to fix the issue that CP firmware fails to
4431 	 * update read pointer when CPDMA is writing clearing operation
4432 	 * to GDS in suspend/resume sequence on several cards. So just
4433 	 * limit this operation in cold boot sequence.
4434 	 */
4435 	if (!adev->in_suspend) {
4436 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4437 		if (r)
4438 			return r;
4439 	}
4440 
4441 	/* requires IBs so do in late init after IB pool is initialized */
4442 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4443 	if (r)
4444 		return r;
4445 
4446 	if (adev->gfx.funcs &&
4447 	    adev->gfx.funcs->reset_ras_error_count)
4448 		adev->gfx.funcs->reset_ras_error_count(adev);
4449 
4450 	r = amdgpu_gfx_ras_late_init(adev);
4451 	if (r)
4452 		return r;
4453 
4454 	return 0;
4455 }
4456 
4457 static int gfx_v9_0_late_init(void *handle)
4458 {
4459 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4460 	int r;
4461 
4462 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4463 	if (r)
4464 		return r;
4465 
4466 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4467 	if (r)
4468 		return r;
4469 
4470 	r = gfx_v9_0_ecc_late_init(handle);
4471 	if (r)
4472 		return r;
4473 
4474 	return 0;
4475 }
4476 
4477 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4478 {
4479 	uint32_t rlc_setting;
4480 
4481 	/* if RLC is not enabled, do nothing */
4482 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4483 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4484 		return false;
4485 
4486 	return true;
4487 }
4488 
4489 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4490 {
4491 	uint32_t data;
4492 	unsigned i;
4493 
4494 	data = RLC_SAFE_MODE__CMD_MASK;
4495 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4496 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4497 
4498 	/* wait for RLC_SAFE_MODE */
4499 	for (i = 0; i < adev->usec_timeout; i++) {
4500 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4501 			break;
4502 		udelay(1);
4503 	}
4504 }
4505 
4506 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4507 {
4508 	uint32_t data;
4509 
4510 	data = RLC_SAFE_MODE__CMD_MASK;
4511 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4512 }
4513 
4514 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4515 						bool enable)
4516 {
4517 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4518 
4519 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4520 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4521 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4522 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4523 	} else {
4524 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4525 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4526 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4527 	}
4528 
4529 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4530 }
4531 
4532 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4533 						bool enable)
4534 {
4535 	/* TODO: double check if we need to perform under safe mode */
4536 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4537 
4538 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4539 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4540 	else
4541 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4542 
4543 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4544 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4545 	else
4546 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4547 
4548 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4549 }
4550 
4551 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4552 						      bool enable)
4553 {
4554 	uint32_t data, def;
4555 
4556 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4557 
4558 	/* It is disabled by HW by default */
4559 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4560 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4561 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4562 
4563 		if (adev->asic_type != CHIP_VEGA12)
4564 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4565 
4566 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4567 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4568 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4569 
4570 		/* only for Vega10 & Raven1 */
4571 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4572 
4573 		if (def != data)
4574 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4575 
4576 		/* MGLS is a global flag to control all MGLS in GFX */
4577 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4578 			/* 2 - RLC memory Light sleep */
4579 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4580 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4581 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4582 				if (def != data)
4583 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4584 			}
4585 			/* 3 - CP memory Light sleep */
4586 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4587 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4588 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4589 				if (def != data)
4590 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4591 			}
4592 		}
4593 	} else {
4594 		/* 1 - MGCG_OVERRIDE */
4595 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4596 
4597 		if (adev->asic_type != CHIP_VEGA12)
4598 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4599 
4600 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4601 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4602 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4603 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4604 
4605 		if (def != data)
4606 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4607 
4608 		/* 2 - disable MGLS in RLC */
4609 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4610 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4611 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4612 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4613 		}
4614 
4615 		/* 3 - disable MGLS in CP */
4616 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4617 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4618 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4619 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4620 		}
4621 	}
4622 
4623 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4624 }
4625 
4626 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4627 					   bool enable)
4628 {
4629 	uint32_t data, def;
4630 
4631 	if (adev->asic_type == CHIP_ARCTURUS)
4632 		return;
4633 
4634 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4635 
4636 	/* Enable 3D CGCG/CGLS */
4637 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4638 		/* write cmd to clear cgcg/cgls ov */
4639 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4640 		/* unset CGCG override */
4641 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4642 		/* update CGCG and CGLS override bits */
4643 		if (def != data)
4644 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4645 
4646 		/* enable 3Dcgcg FSM(0x0000363f) */
4647 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4648 
4649 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4650 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4651 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4652 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4653 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4654 		if (def != data)
4655 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4656 
4657 		/* set IDLE_POLL_COUNT(0x00900100) */
4658 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4659 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4660 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4661 		if (def != data)
4662 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4663 	} else {
4664 		/* Disable CGCG/CGLS */
4665 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4666 		/* disable cgcg, cgls should be disabled */
4667 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4668 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4669 		/* disable cgcg and cgls in FSM */
4670 		if (def != data)
4671 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4672 	}
4673 
4674 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4675 }
4676 
4677 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4678 						      bool enable)
4679 {
4680 	uint32_t def, data;
4681 
4682 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4683 
4684 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4685 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4686 		/* unset CGCG override */
4687 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4688 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4689 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4690 		else
4691 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4692 		/* update CGCG and CGLS override bits */
4693 		if (def != data)
4694 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4695 
4696 		/* enable cgcg FSM(0x0000363F) */
4697 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4698 
4699 		if (adev->asic_type == CHIP_ARCTURUS)
4700 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4701 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4702 		else
4703 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4704 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4705 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4706 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4707 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4708 		if (def != data)
4709 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4710 
4711 		/* set IDLE_POLL_COUNT(0x00900100) */
4712 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4713 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4714 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4715 		if (def != data)
4716 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4717 	} else {
4718 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4719 		/* reset CGCG/CGLS bits */
4720 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4721 		/* disable cgcg and cgls in FSM */
4722 		if (def != data)
4723 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4724 	}
4725 
4726 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4727 }
4728 
4729 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4730 					    bool enable)
4731 {
4732 	if (enable) {
4733 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4734 		 * ===  MGCG + MGLS ===
4735 		 */
4736 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4737 		/* ===  CGCG /CGLS for GFX 3D Only === */
4738 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4739 		/* ===  CGCG + CGLS === */
4740 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4741 	} else {
4742 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4743 		 * ===  CGCG + CGLS ===
4744 		 */
4745 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4746 		/* ===  CGCG /CGLS for GFX 3D Only === */
4747 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4748 		/* ===  MGCG + MGLS === */
4749 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4750 	}
4751 	return 0;
4752 }
4753 
4754 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4755 {
4756 	u32 data;
4757 
4758 	data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4759 
4760 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4761 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4762 
4763 	WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4764 }
4765 
4766 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4767 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4768 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4769 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4770 	.init = gfx_v9_0_rlc_init,
4771 	.get_csb_size = gfx_v9_0_get_csb_size,
4772 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4773 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4774 	.resume = gfx_v9_0_rlc_resume,
4775 	.stop = gfx_v9_0_rlc_stop,
4776 	.reset = gfx_v9_0_rlc_reset,
4777 	.start = gfx_v9_0_rlc_start,
4778 	.update_spm_vmid = gfx_v9_0_update_spm_vmid
4779 };
4780 
4781 static int gfx_v9_0_set_powergating_state(void *handle,
4782 					  enum amd_powergating_state state)
4783 {
4784 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4785 	bool enable = (state == AMD_PG_STATE_GATE);
4786 
4787 	switch (adev->asic_type) {
4788 	case CHIP_RAVEN:
4789 	case CHIP_RENOIR:
4790 		if (!enable) {
4791 			amdgpu_gfx_off_ctrl(adev, false);
4792 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4793 		}
4794 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4795 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4796 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4797 		} else {
4798 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4799 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4800 		}
4801 
4802 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4803 			gfx_v9_0_enable_cp_power_gating(adev, true);
4804 		else
4805 			gfx_v9_0_enable_cp_power_gating(adev, false);
4806 
4807 		/* update gfx cgpg state */
4808 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4809 
4810 		/* update mgcg state */
4811 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4812 
4813 		if (enable)
4814 			amdgpu_gfx_off_ctrl(adev, true);
4815 		break;
4816 	case CHIP_VEGA12:
4817 		if (!enable) {
4818 			amdgpu_gfx_off_ctrl(adev, false);
4819 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4820 		} else {
4821 			amdgpu_gfx_off_ctrl(adev, true);
4822 		}
4823 		break;
4824 	default:
4825 		break;
4826 	}
4827 
4828 	return 0;
4829 }
4830 
4831 static int gfx_v9_0_set_clockgating_state(void *handle,
4832 					  enum amd_clockgating_state state)
4833 {
4834 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4835 
4836 	if (amdgpu_sriov_vf(adev))
4837 		return 0;
4838 
4839 	switch (adev->asic_type) {
4840 	case CHIP_VEGA10:
4841 	case CHIP_VEGA12:
4842 	case CHIP_VEGA20:
4843 	case CHIP_RAVEN:
4844 	case CHIP_ARCTURUS:
4845 	case CHIP_RENOIR:
4846 		gfx_v9_0_update_gfx_clock_gating(adev,
4847 						 state == AMD_CG_STATE_GATE);
4848 		break;
4849 	default:
4850 		break;
4851 	}
4852 	return 0;
4853 }
4854 
4855 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4856 {
4857 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4858 	int data;
4859 
4860 	if (amdgpu_sriov_vf(adev))
4861 		*flags = 0;
4862 
4863 	/* AMD_CG_SUPPORT_GFX_MGCG */
4864 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4865 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4866 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4867 
4868 	/* AMD_CG_SUPPORT_GFX_CGCG */
4869 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4870 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4871 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4872 
4873 	/* AMD_CG_SUPPORT_GFX_CGLS */
4874 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4875 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4876 
4877 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4878 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4879 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4880 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4881 
4882 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4883 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4884 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4885 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4886 
4887 	if (adev->asic_type != CHIP_ARCTURUS) {
4888 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4889 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4890 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4891 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4892 
4893 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4894 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4895 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4896 	}
4897 }
4898 
4899 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4900 {
4901 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4902 }
4903 
4904 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4905 {
4906 	struct amdgpu_device *adev = ring->adev;
4907 	u64 wptr;
4908 
4909 	/* XXX check if swapping is necessary on BE */
4910 	if (ring->use_doorbell) {
4911 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4912 	} else {
4913 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4914 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4915 	}
4916 
4917 	return wptr;
4918 }
4919 
4920 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4921 {
4922 	struct amdgpu_device *adev = ring->adev;
4923 
4924 	if (ring->use_doorbell) {
4925 		/* XXX check if swapping is necessary on BE */
4926 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4927 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4928 	} else {
4929 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4930 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4931 	}
4932 }
4933 
4934 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4935 {
4936 	struct amdgpu_device *adev = ring->adev;
4937 	u32 ref_and_mask, reg_mem_engine;
4938 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4939 
4940 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4941 		switch (ring->me) {
4942 		case 1:
4943 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4944 			break;
4945 		case 2:
4946 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4947 			break;
4948 		default:
4949 			return;
4950 		}
4951 		reg_mem_engine = 0;
4952 	} else {
4953 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4954 		reg_mem_engine = 1; /* pfp */
4955 	}
4956 
4957 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4958 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4959 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4960 			      ref_and_mask, ref_and_mask, 0x20);
4961 }
4962 
4963 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4964 					struct amdgpu_job *job,
4965 					struct amdgpu_ib *ib,
4966 					uint32_t flags)
4967 {
4968 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4969 	u32 header, control = 0;
4970 
4971 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4972 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4973 	else
4974 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4975 
4976 	control |= ib->length_dw | (vmid << 24);
4977 
4978 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4979 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4980 
4981 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
4982 			gfx_v9_0_ring_emit_de_meta(ring);
4983 	}
4984 
4985 	amdgpu_ring_write(ring, header);
4986 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4987 	amdgpu_ring_write(ring,
4988 #ifdef __BIG_ENDIAN
4989 		(2 << 0) |
4990 #endif
4991 		lower_32_bits(ib->gpu_addr));
4992 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4993 	amdgpu_ring_write(ring, control);
4994 }
4995 
4996 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4997 					  struct amdgpu_job *job,
4998 					  struct amdgpu_ib *ib,
4999 					  uint32_t flags)
5000 {
5001 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5002 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5003 
5004 	/* Currently, there is a high possibility to get wave ID mismatch
5005 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5006 	 * different wave IDs than the GDS expects. This situation happens
5007 	 * randomly when at least 5 compute pipes use GDS ordered append.
5008 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5009 	 * Those are probably bugs somewhere else in the kernel driver.
5010 	 *
5011 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5012 	 * GDS to 0 for this ring (me/pipe).
5013 	 */
5014 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5015 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5016 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5017 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5018 	}
5019 
5020 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5021 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5022 	amdgpu_ring_write(ring,
5023 #ifdef __BIG_ENDIAN
5024 				(2 << 0) |
5025 #endif
5026 				lower_32_bits(ib->gpu_addr));
5027 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5028 	amdgpu_ring_write(ring, control);
5029 }
5030 
5031 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5032 				     u64 seq, unsigned flags)
5033 {
5034 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5035 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5036 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5037 
5038 	/* RELEASE_MEM - flush caches, send int */
5039 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5040 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5041 					       EOP_TC_NC_ACTION_EN) :
5042 					      (EOP_TCL1_ACTION_EN |
5043 					       EOP_TC_ACTION_EN |
5044 					       EOP_TC_WB_ACTION_EN |
5045 					       EOP_TC_MD_ACTION_EN)) |
5046 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5047 				 EVENT_INDEX(5)));
5048 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5049 
5050 	/*
5051 	 * the address should be Qword aligned if 64bit write, Dword
5052 	 * aligned if only send 32bit data low (discard data high)
5053 	 */
5054 	if (write64bit)
5055 		BUG_ON(addr & 0x7);
5056 	else
5057 		BUG_ON(addr & 0x3);
5058 	amdgpu_ring_write(ring, lower_32_bits(addr));
5059 	amdgpu_ring_write(ring, upper_32_bits(addr));
5060 	amdgpu_ring_write(ring, lower_32_bits(seq));
5061 	amdgpu_ring_write(ring, upper_32_bits(seq));
5062 	amdgpu_ring_write(ring, 0);
5063 }
5064 
5065 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5066 {
5067 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5068 	uint32_t seq = ring->fence_drv.sync_seq;
5069 	uint64_t addr = ring->fence_drv.gpu_addr;
5070 
5071 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5072 			      lower_32_bits(addr), upper_32_bits(addr),
5073 			      seq, 0xffffffff, 4);
5074 }
5075 
5076 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5077 					unsigned vmid, uint64_t pd_addr)
5078 {
5079 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5080 
5081 	/* compute doesn't have PFP */
5082 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5083 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5084 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5085 		amdgpu_ring_write(ring, 0x0);
5086 	}
5087 }
5088 
5089 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5090 {
5091 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5092 }
5093 
5094 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5095 {
5096 	u64 wptr;
5097 
5098 	/* XXX check if swapping is necessary on BE */
5099 	if (ring->use_doorbell)
5100 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5101 	else
5102 		BUG();
5103 	return wptr;
5104 }
5105 
5106 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5107 					   bool acquire)
5108 {
5109 	struct amdgpu_device *adev = ring->adev;
5110 	int pipe_num, tmp, reg;
5111 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5112 
5113 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5114 
5115 	/* first me only has 2 entries, GFX and HP3D */
5116 	if (ring->me > 0)
5117 		pipe_num -= 2;
5118 
5119 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5120 	tmp = RREG32(reg);
5121 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5122 	WREG32(reg, tmp);
5123 }
5124 
5125 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5126 					    struct amdgpu_ring *ring,
5127 					    bool acquire)
5128 {
5129 	int i, pipe;
5130 	bool reserve;
5131 	struct amdgpu_ring *iring;
5132 
5133 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5134 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5135 	if (acquire)
5136 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5137 	else
5138 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5139 
5140 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5141 		/* Clear all reservations - everyone reacquires all resources */
5142 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5143 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5144 						       true);
5145 
5146 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5147 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5148 						       true);
5149 	} else {
5150 		/* Lower all pipes without a current reservation */
5151 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5152 			iring = &adev->gfx.gfx_ring[i];
5153 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5154 							   iring->me,
5155 							   iring->pipe,
5156 							   0);
5157 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5158 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5159 		}
5160 
5161 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5162 			iring = &adev->gfx.compute_ring[i];
5163 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5164 							   iring->me,
5165 							   iring->pipe,
5166 							   0);
5167 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5168 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5169 		}
5170 	}
5171 
5172 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5173 }
5174 
5175 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5176 				      struct amdgpu_ring *ring,
5177 				      bool acquire)
5178 {
5179 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5180 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5181 
5182 	mutex_lock(&adev->srbm_mutex);
5183 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5184 
5185 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5186 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5187 
5188 	soc15_grbm_select(adev, 0, 0, 0, 0);
5189 	mutex_unlock(&adev->srbm_mutex);
5190 }
5191 
5192 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5193 					       enum drm_sched_priority priority)
5194 {
5195 	struct amdgpu_device *adev = ring->adev;
5196 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5197 
5198 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5199 		return;
5200 
5201 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5202 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5203 }
5204 
5205 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5206 {
5207 	struct amdgpu_device *adev = ring->adev;
5208 
5209 	/* XXX check if swapping is necessary on BE */
5210 	if (ring->use_doorbell) {
5211 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5212 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5213 	} else{
5214 		BUG(); /* only DOORBELL method supported on gfx9 now */
5215 	}
5216 }
5217 
5218 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5219 					 u64 seq, unsigned int flags)
5220 {
5221 	struct amdgpu_device *adev = ring->adev;
5222 
5223 	/* we only allocate 32bit for each seq wb address */
5224 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5225 
5226 	/* write fence seq to the "addr" */
5227 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5228 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5229 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5230 	amdgpu_ring_write(ring, lower_32_bits(addr));
5231 	amdgpu_ring_write(ring, upper_32_bits(addr));
5232 	amdgpu_ring_write(ring, lower_32_bits(seq));
5233 
5234 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5235 		/* set register to trigger INT */
5236 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5237 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5238 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5239 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5240 		amdgpu_ring_write(ring, 0);
5241 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5242 	}
5243 }
5244 
5245 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5246 {
5247 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5248 	amdgpu_ring_write(ring, 0);
5249 }
5250 
5251 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5252 {
5253 	struct v9_ce_ib_state ce_payload = {0};
5254 	uint64_t csa_addr;
5255 	int cnt;
5256 
5257 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5258 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5259 
5260 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5261 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5262 				 WRITE_DATA_DST_SEL(8) |
5263 				 WR_CONFIRM) |
5264 				 WRITE_DATA_CACHE_POLICY(0));
5265 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5266 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5267 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5268 }
5269 
5270 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5271 {
5272 	struct v9_de_ib_state de_payload = {0};
5273 	uint64_t csa_addr, gds_addr;
5274 	int cnt;
5275 
5276 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5277 	gds_addr = csa_addr + 4096;
5278 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5279 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5280 
5281 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5282 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5283 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5284 				 WRITE_DATA_DST_SEL(8) |
5285 				 WR_CONFIRM) |
5286 				 WRITE_DATA_CACHE_POLICY(0));
5287 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5288 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5289 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5290 }
5291 
5292 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5293 {
5294 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5295 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5296 }
5297 
5298 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5299 {
5300 	uint32_t dw2 = 0;
5301 
5302 	if (amdgpu_sriov_vf(ring->adev))
5303 		gfx_v9_0_ring_emit_ce_meta(ring);
5304 
5305 	gfx_v9_0_ring_emit_tmz(ring, true);
5306 
5307 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5308 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5309 		/* set load_global_config & load_global_uconfig */
5310 		dw2 |= 0x8001;
5311 		/* set load_cs_sh_regs */
5312 		dw2 |= 0x01000000;
5313 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5314 		dw2 |= 0x10002;
5315 
5316 		/* set load_ce_ram if preamble presented */
5317 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5318 			dw2 |= 0x10000000;
5319 	} else {
5320 		/* still load_ce_ram if this is the first time preamble presented
5321 		 * although there is no context switch happens.
5322 		 */
5323 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5324 			dw2 |= 0x10000000;
5325 	}
5326 
5327 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5328 	amdgpu_ring_write(ring, dw2);
5329 	amdgpu_ring_write(ring, 0);
5330 }
5331 
5332 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5333 {
5334 	unsigned ret;
5335 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5336 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5337 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5338 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5339 	ret = ring->wptr & ring->buf_mask;
5340 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5341 	return ret;
5342 }
5343 
5344 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5345 {
5346 	unsigned cur;
5347 	BUG_ON(offset > ring->buf_mask);
5348 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5349 
5350 	cur = (ring->wptr & ring->buf_mask) - 1;
5351 	if (likely(cur > offset))
5352 		ring->ring[offset] = cur - offset;
5353 	else
5354 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5355 }
5356 
5357 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5358 {
5359 	struct amdgpu_device *adev = ring->adev;
5360 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5361 
5362 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5363 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5364 				(5 << 8) |	/* dst: memory */
5365 				(1 << 20));	/* write confirm */
5366 	amdgpu_ring_write(ring, reg);
5367 	amdgpu_ring_write(ring, 0);
5368 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5369 				kiq->reg_val_offs * 4));
5370 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5371 				kiq->reg_val_offs * 4));
5372 }
5373 
5374 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5375 				    uint32_t val)
5376 {
5377 	uint32_t cmd = 0;
5378 
5379 	switch (ring->funcs->type) {
5380 	case AMDGPU_RING_TYPE_GFX:
5381 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5382 		break;
5383 	case AMDGPU_RING_TYPE_KIQ:
5384 		cmd = (1 << 16); /* no inc addr */
5385 		break;
5386 	default:
5387 		cmd = WR_CONFIRM;
5388 		break;
5389 	}
5390 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5391 	amdgpu_ring_write(ring, cmd);
5392 	amdgpu_ring_write(ring, reg);
5393 	amdgpu_ring_write(ring, 0);
5394 	amdgpu_ring_write(ring, val);
5395 }
5396 
5397 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5398 					uint32_t val, uint32_t mask)
5399 {
5400 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5401 }
5402 
5403 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5404 						  uint32_t reg0, uint32_t reg1,
5405 						  uint32_t ref, uint32_t mask)
5406 {
5407 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5408 	struct amdgpu_device *adev = ring->adev;
5409 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5410 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5411 
5412 	if (fw_version_ok)
5413 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5414 				      ref, mask, 0x20);
5415 	else
5416 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5417 							   ref, mask);
5418 }
5419 
5420 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5421 {
5422 	struct amdgpu_device *adev = ring->adev;
5423 	uint32_t value = 0;
5424 
5425 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5426 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5427 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5428 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5429 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5430 }
5431 
5432 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5433 						 enum amdgpu_interrupt_state state)
5434 {
5435 	switch (state) {
5436 	case AMDGPU_IRQ_STATE_DISABLE:
5437 	case AMDGPU_IRQ_STATE_ENABLE:
5438 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5439 			       TIME_STAMP_INT_ENABLE,
5440 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5441 		break;
5442 	default:
5443 		break;
5444 	}
5445 }
5446 
5447 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5448 						     int me, int pipe,
5449 						     enum amdgpu_interrupt_state state)
5450 {
5451 	u32 mec_int_cntl, mec_int_cntl_reg;
5452 
5453 	/*
5454 	 * amdgpu controls only the first MEC. That's why this function only
5455 	 * handles the setting of interrupts for this specific MEC. All other
5456 	 * pipes' interrupts are set by amdkfd.
5457 	 */
5458 
5459 	if (me == 1) {
5460 		switch (pipe) {
5461 		case 0:
5462 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5463 			break;
5464 		case 1:
5465 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5466 			break;
5467 		case 2:
5468 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5469 			break;
5470 		case 3:
5471 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5472 			break;
5473 		default:
5474 			DRM_DEBUG("invalid pipe %d\n", pipe);
5475 			return;
5476 		}
5477 	} else {
5478 		DRM_DEBUG("invalid me %d\n", me);
5479 		return;
5480 	}
5481 
5482 	switch (state) {
5483 	case AMDGPU_IRQ_STATE_DISABLE:
5484 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5485 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5486 					     TIME_STAMP_INT_ENABLE, 0);
5487 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5488 		break;
5489 	case AMDGPU_IRQ_STATE_ENABLE:
5490 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5491 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5492 					     TIME_STAMP_INT_ENABLE, 1);
5493 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5494 		break;
5495 	default:
5496 		break;
5497 	}
5498 }
5499 
5500 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5501 					     struct amdgpu_irq_src *source,
5502 					     unsigned type,
5503 					     enum amdgpu_interrupt_state state)
5504 {
5505 	switch (state) {
5506 	case AMDGPU_IRQ_STATE_DISABLE:
5507 	case AMDGPU_IRQ_STATE_ENABLE:
5508 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5509 			       PRIV_REG_INT_ENABLE,
5510 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5511 		break;
5512 	default:
5513 		break;
5514 	}
5515 
5516 	return 0;
5517 }
5518 
5519 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5520 					      struct amdgpu_irq_src *source,
5521 					      unsigned type,
5522 					      enum amdgpu_interrupt_state state)
5523 {
5524 	switch (state) {
5525 	case AMDGPU_IRQ_STATE_DISABLE:
5526 	case AMDGPU_IRQ_STATE_ENABLE:
5527 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5528 			       PRIV_INSTR_INT_ENABLE,
5529 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5530 	default:
5531 		break;
5532 	}
5533 
5534 	return 0;
5535 }
5536 
5537 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5538 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5539 			CP_ECC_ERROR_INT_ENABLE, 1)
5540 
5541 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5542 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5543 			CP_ECC_ERROR_INT_ENABLE, 0)
5544 
5545 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5546 					      struct amdgpu_irq_src *source,
5547 					      unsigned type,
5548 					      enum amdgpu_interrupt_state state)
5549 {
5550 	switch (state) {
5551 	case AMDGPU_IRQ_STATE_DISABLE:
5552 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5553 				CP_ECC_ERROR_INT_ENABLE, 0);
5554 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5555 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5556 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5557 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5558 		break;
5559 
5560 	case AMDGPU_IRQ_STATE_ENABLE:
5561 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5562 				CP_ECC_ERROR_INT_ENABLE, 1);
5563 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5564 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5565 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5566 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5567 		break;
5568 	default:
5569 		break;
5570 	}
5571 
5572 	return 0;
5573 }
5574 
5575 
5576 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5577 					    struct amdgpu_irq_src *src,
5578 					    unsigned type,
5579 					    enum amdgpu_interrupt_state state)
5580 {
5581 	switch (type) {
5582 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5583 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5584 		break;
5585 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5586 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5587 		break;
5588 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5589 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5590 		break;
5591 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5592 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5593 		break;
5594 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5595 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5596 		break;
5597 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5598 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5599 		break;
5600 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5601 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5602 		break;
5603 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5604 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5605 		break;
5606 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5607 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5608 		break;
5609 	default:
5610 		break;
5611 	}
5612 	return 0;
5613 }
5614 
5615 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5616 			    struct amdgpu_irq_src *source,
5617 			    struct amdgpu_iv_entry *entry)
5618 {
5619 	int i;
5620 	u8 me_id, pipe_id, queue_id;
5621 	struct amdgpu_ring *ring;
5622 
5623 	DRM_DEBUG("IH: CP EOP\n");
5624 	me_id = (entry->ring_id & 0x0c) >> 2;
5625 	pipe_id = (entry->ring_id & 0x03) >> 0;
5626 	queue_id = (entry->ring_id & 0x70) >> 4;
5627 
5628 	switch (me_id) {
5629 	case 0:
5630 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5631 		break;
5632 	case 1:
5633 	case 2:
5634 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5635 			ring = &adev->gfx.compute_ring[i];
5636 			/* Per-queue interrupt is supported for MEC starting from VI.
5637 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5638 			  */
5639 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5640 				amdgpu_fence_process(ring);
5641 		}
5642 		break;
5643 	}
5644 	return 0;
5645 }
5646 
5647 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5648 			   struct amdgpu_iv_entry *entry)
5649 {
5650 	u8 me_id, pipe_id, queue_id;
5651 	struct amdgpu_ring *ring;
5652 	int i;
5653 
5654 	me_id = (entry->ring_id & 0x0c) >> 2;
5655 	pipe_id = (entry->ring_id & 0x03) >> 0;
5656 	queue_id = (entry->ring_id & 0x70) >> 4;
5657 
5658 	switch (me_id) {
5659 	case 0:
5660 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5661 		break;
5662 	case 1:
5663 	case 2:
5664 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5665 			ring = &adev->gfx.compute_ring[i];
5666 			if (ring->me == me_id && ring->pipe == pipe_id &&
5667 			    ring->queue == queue_id)
5668 				drm_sched_fault(&ring->sched);
5669 		}
5670 		break;
5671 	}
5672 }
5673 
5674 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5675 				 struct amdgpu_irq_src *source,
5676 				 struct amdgpu_iv_entry *entry)
5677 {
5678 	DRM_ERROR("Illegal register access in command stream\n");
5679 	gfx_v9_0_fault(adev, entry);
5680 	return 0;
5681 }
5682 
5683 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5684 				  struct amdgpu_irq_src *source,
5685 				  struct amdgpu_iv_entry *entry)
5686 {
5687 	DRM_ERROR("Illegal instruction in command stream\n");
5688 	gfx_v9_0_fault(adev, entry);
5689 	return 0;
5690 }
5691 
5692 
5693 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5694 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5695 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5696 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5697 	},
5698 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5699 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5700 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5701 	},
5702 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5703 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5704 	  0, 0
5705 	},
5706 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5707 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5708 	  0, 0
5709 	},
5710 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5711 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5712 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5713 	},
5714 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5715 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5716 	  0, 0
5717 	},
5718 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5719 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5720 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5721 	},
5722 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5723 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5724 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5725 	},
5726 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5727 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5728 	  0, 0
5729 	},
5730 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5731 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5732 	  0, 0
5733 	},
5734 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5735 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5736 	  0, 0
5737 	},
5738 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5739 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5740 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5741 	},
5742 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5743 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5744 	  0, 0
5745 	},
5746 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5747 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5748 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5749 	},
5750 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5751 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5752 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5753 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5754 	},
5755 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5756 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5757 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5758 	  0, 0
5759 	},
5760 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5761 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5762 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5763 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5764 	},
5765 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5766 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5767 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5768 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5769 	},
5770 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5771 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5772 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5773 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5774 	},
5775 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5776 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5777 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5778 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5779 	},
5780 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5781 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5782 	  0, 0
5783 	},
5784 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5785 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5786 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5787 	},
5788 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5789 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5790 	  0, 0
5791 	},
5792 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5793 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5794 	  0, 0
5795 	},
5796 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5797 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5798 	  0, 0
5799 	},
5800 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5801 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5802 	  0, 0
5803 	},
5804 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5805 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5806 	  0, 0
5807 	},
5808 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5809 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5810 	  0, 0
5811 	},
5812 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5813 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5814 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5815 	},
5816 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5817 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5818 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5819 	},
5820 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5821 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5822 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5823 	},
5824 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5825 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5826 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5827 	},
5828 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5829 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5830 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5831 	},
5832 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5833 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5834 	  0, 0
5835 	},
5836 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5837 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5838 	  0, 0
5839 	},
5840 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5841 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5842 	  0, 0
5843 	},
5844 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5845 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5846 	  0, 0
5847 	},
5848 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5849 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5850 	  0, 0
5851 	},
5852 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5853 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5854 	  0, 0
5855 	},
5856 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5857 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5858 	  0, 0
5859 	},
5860 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5861 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5862 	  0, 0
5863 	},
5864 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5865 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5866 	  0, 0
5867 	},
5868 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5869 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5870 	  0, 0
5871 	},
5872 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5873 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5874 	  0, 0
5875 	},
5876 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5877 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5878 	  0, 0
5879 	},
5880 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5881 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5882 	  0, 0
5883 	},
5884 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5885 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5886 	  0, 0
5887 	},
5888 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5889 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5890 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5891 	},
5892 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5893 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5894 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5895 	},
5896 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5897 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5898 	  0, 0
5899 	},
5900 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5901 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5902 	  0, 0
5903 	},
5904 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5905 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5906 	  0, 0
5907 	},
5908 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5909 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5910 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5911 	},
5912 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5913 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5914 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5915 	},
5916 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5917 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5918 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5919 	},
5920 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5921 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5922 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5923 	},
5924 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5925 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5926 	  0, 0
5927 	},
5928 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5929 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5930 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5931 	},
5932 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5933 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5934 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5935 	},
5936 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5937 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5938 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5939 	},
5940 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5941 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5942 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5943 	},
5944 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5945 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5946 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5947 	},
5948 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5949 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5950 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5951 	},
5952 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5953 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5954 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5955 	},
5956 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5957 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5958 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5959 	},
5960 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5961 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5962 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5963 	},
5964 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5965 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5966 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5967 	},
5968 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5969 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5970 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5971 	},
5972 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5973 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5974 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5975 	},
5976 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5977 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5978 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5979 	},
5980 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5981 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5982 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5983 	},
5984 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5985 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5986 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5987 	},
5988 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5989 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5990 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5991 	},
5992 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5993 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5994 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5995 	},
5996 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5997 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5998 	  0, 0
5999 	},
6000 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6001 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6002 	  0, 0
6003 	},
6004 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6005 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6006 	  0, 0
6007 	},
6008 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6009 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6010 	  0, 0
6011 	},
6012 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6013 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6014 	  0, 0
6015 	},
6016 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6017 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6018 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6019 	},
6020 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6021 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6022 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6023 	},
6024 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6025 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6026 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6027 	},
6028 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6029 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6030 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6031 	},
6032 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6033 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6034 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6035 	},
6036 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6037 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6038 	  0, 0
6039 	},
6040 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6041 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6042 	  0, 0
6043 	},
6044 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6045 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6046 	  0, 0
6047 	},
6048 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6049 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6050 	  0, 0
6051 	},
6052 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6053 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6054 	  0, 0
6055 	},
6056 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6057 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6058 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6059 	},
6060 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6061 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6062 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6063 	},
6064 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6065 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6066 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6067 	},
6068 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6069 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6070 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6071 	},
6072 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6073 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6074 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6075 	},
6076 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6077 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6078 	  0, 0
6079 	},
6080 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6081 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6082 	  0, 0
6083 	},
6084 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6085 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6086 	  0, 0
6087 	},
6088 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6089 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6090 	  0, 0
6091 	},
6092 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6093 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6094 	  0, 0
6095 	},
6096 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6097 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6098 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6099 	},
6100 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6101 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6102 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6103 	},
6104 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6105 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6106 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6107 	},
6108 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6109 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6110 	  0, 0
6111 	},
6112 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6113 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6114 	  0, 0
6115 	},
6116 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6117 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6118 	  0, 0
6119 	},
6120 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6121 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6122 	  0, 0
6123 	},
6124 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6125 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6126 	  0, 0
6127 	},
6128 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6129 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6130 	  0, 0
6131 	}
6132 };
6133 
6134 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6135 				     void *inject_if)
6136 {
6137 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6138 	int ret;
6139 	struct ta_ras_trigger_error_input block_info = { 0 };
6140 
6141 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6142 		return -EINVAL;
6143 
6144 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6145 		return -EINVAL;
6146 
6147 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6148 		return -EPERM;
6149 
6150 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6151 	      info->head.type)) {
6152 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6153 			ras_gfx_subblocks[info->head.sub_block_index].name,
6154 			info->head.type);
6155 		return -EPERM;
6156 	}
6157 
6158 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6159 	      info->head.type)) {
6160 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6161 			ras_gfx_subblocks[info->head.sub_block_index].name,
6162 			info->head.type);
6163 		return -EPERM;
6164 	}
6165 
6166 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6167 	block_info.sub_block_index =
6168 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6169 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6170 	block_info.address = info->address;
6171 	block_info.value = info->value;
6172 
6173 	mutex_lock(&adev->grbm_idx_mutex);
6174 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6175 	mutex_unlock(&adev->grbm_idx_mutex);
6176 
6177 	return ret;
6178 }
6179 
6180 static const char *vml2_mems[] = {
6181 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6182 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6183 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6184 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6185 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6186 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6187 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6188 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6189 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6190 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6191 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6192 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6193 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6194 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6195 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6196 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6197 };
6198 
6199 static const char *vml2_walker_mems[] = {
6200 	"UTC_VML2_CACHE_PDE0_MEM0",
6201 	"UTC_VML2_CACHE_PDE0_MEM1",
6202 	"UTC_VML2_CACHE_PDE1_MEM0",
6203 	"UTC_VML2_CACHE_PDE1_MEM1",
6204 	"UTC_VML2_CACHE_PDE2_MEM0",
6205 	"UTC_VML2_CACHE_PDE2_MEM1",
6206 	"UTC_VML2_RDIF_LOG_FIFO",
6207 };
6208 
6209 static const char *atc_l2_cache_2m_mems[] = {
6210 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6211 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6212 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6213 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6214 };
6215 
6216 static const char *atc_l2_cache_4k_mems[] = {
6217 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6218 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6219 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6220 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6221 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6222 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6223 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6224 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6225 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6226 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6227 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6228 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6229 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6230 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6231 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6232 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6233 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6234 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6235 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6236 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6237 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6238 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6239 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6240 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6241 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6242 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6243 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6244 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6245 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6246 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6247 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6248 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6249 };
6250 
6251 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6252 					 struct ras_err_data *err_data)
6253 {
6254 	uint32_t i, data;
6255 	uint32_t sec_count, ded_count;
6256 
6257 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6258 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6259 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6260 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6261 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6262 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6263 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6264 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6265 
6266 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6267 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6268 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6269 
6270 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6271 		if (sec_count) {
6272 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6273 				 vml2_mems[i], sec_count);
6274 			err_data->ce_count += sec_count;
6275 		}
6276 
6277 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6278 		if (ded_count) {
6279 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6280 				 vml2_mems[i], ded_count);
6281 			err_data->ue_count += ded_count;
6282 		}
6283 	}
6284 
6285 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6286 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6287 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6288 
6289 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6290 						SEC_COUNT);
6291 		if (sec_count) {
6292 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6293 				 vml2_walker_mems[i], sec_count);
6294 			err_data->ce_count += sec_count;
6295 		}
6296 
6297 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6298 						DED_COUNT);
6299 		if (ded_count) {
6300 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6301 				 vml2_walker_mems[i], ded_count);
6302 			err_data->ue_count += ded_count;
6303 		}
6304 	}
6305 
6306 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6307 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6308 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6309 
6310 		sec_count = (data & 0x00006000L) >> 0xd;
6311 		if (sec_count) {
6312 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6313 				 atc_l2_cache_2m_mems[i], sec_count);
6314 			err_data->ce_count += sec_count;
6315 		}
6316 	}
6317 
6318 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6319 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6320 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6321 
6322 		sec_count = (data & 0x00006000L) >> 0xd;
6323 		if (sec_count) {
6324 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6325 				 atc_l2_cache_4k_mems[i], sec_count);
6326 			err_data->ce_count += sec_count;
6327 		}
6328 
6329 		ded_count = (data & 0x00018000L) >> 0xf;
6330 		if (ded_count) {
6331 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6332 				 atc_l2_cache_4k_mems[i], ded_count);
6333 			err_data->ue_count += ded_count;
6334 		}
6335 	}
6336 
6337 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6338 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6339 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6340 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6341 
6342 	return 0;
6343 }
6344 
6345 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6346 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6347 	uint32_t *sec_count, uint32_t *ded_count)
6348 {
6349 	uint32_t i;
6350 	uint32_t sec_cnt, ded_cnt;
6351 
6352 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6353 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6354 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6355 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6356 			continue;
6357 
6358 		sec_cnt = (value &
6359 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6360 				gfx_v9_0_ras_fields[i].sec_count_shift;
6361 		if (sec_cnt) {
6362 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6363 				gfx_v9_0_ras_fields[i].name,
6364 				se_id, inst_id,
6365 				sec_cnt);
6366 			*sec_count += sec_cnt;
6367 		}
6368 
6369 		ded_cnt = (value &
6370 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6371 				gfx_v9_0_ras_fields[i].ded_count_shift;
6372 		if (ded_cnt) {
6373 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6374 				gfx_v9_0_ras_fields[i].name,
6375 				se_id, inst_id,
6376 				ded_cnt);
6377 			*ded_count += ded_cnt;
6378 		}
6379 	}
6380 
6381 	return 0;
6382 }
6383 
6384 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6385 {
6386 	int i, j, k;
6387 
6388 	/* read back registers to clear the counters */
6389 	mutex_lock(&adev->grbm_idx_mutex);
6390 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6391 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6392 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6393 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6394 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6395 			}
6396 		}
6397 	}
6398 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6399 	mutex_unlock(&adev->grbm_idx_mutex);
6400 
6401 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6402 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6403 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6404 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6405 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6406 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6407 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6408 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6409 
6410 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6411 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6412 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6413 	}
6414 
6415 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6416 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6417 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6418 	}
6419 
6420 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6421 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6422 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6423 	}
6424 
6425 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6426 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6427 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6428 	}
6429 
6430 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6431 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6432 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6433 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6434 }
6435 
6436 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6437 					  void *ras_error_status)
6438 {
6439 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6440 	uint32_t sec_count = 0, ded_count = 0;
6441 	uint32_t i, j, k;
6442 	uint32_t reg_value;
6443 
6444 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6445 		return -EINVAL;
6446 
6447 	err_data->ue_count = 0;
6448 	err_data->ce_count = 0;
6449 
6450 	mutex_lock(&adev->grbm_idx_mutex);
6451 
6452 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6453 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6454 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6455 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6456 				reg_value =
6457 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6458 				if (reg_value)
6459 					gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6460 							j, k, reg_value,
6461 							&sec_count, &ded_count);
6462 			}
6463 		}
6464 	}
6465 
6466 	err_data->ce_count += sec_count;
6467 	err_data->ue_count += ded_count;
6468 
6469 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6470 	mutex_unlock(&adev->grbm_idx_mutex);
6471 
6472 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6473 
6474 	return 0;
6475 }
6476 
6477 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6478 	.name = "gfx_v9_0",
6479 	.early_init = gfx_v9_0_early_init,
6480 	.late_init = gfx_v9_0_late_init,
6481 	.sw_init = gfx_v9_0_sw_init,
6482 	.sw_fini = gfx_v9_0_sw_fini,
6483 	.hw_init = gfx_v9_0_hw_init,
6484 	.hw_fini = gfx_v9_0_hw_fini,
6485 	.suspend = gfx_v9_0_suspend,
6486 	.resume = gfx_v9_0_resume,
6487 	.is_idle = gfx_v9_0_is_idle,
6488 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6489 	.soft_reset = gfx_v9_0_soft_reset,
6490 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6491 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6492 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6493 };
6494 
6495 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6496 	.type = AMDGPU_RING_TYPE_GFX,
6497 	.align_mask = 0xff,
6498 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6499 	.support_64bit_ptrs = true,
6500 	.vmhub = AMDGPU_GFXHUB_0,
6501 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6502 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6503 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6504 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6505 		5 +  /* COND_EXEC */
6506 		7 +  /* PIPELINE_SYNC */
6507 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6508 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6509 		2 + /* VM_FLUSH */
6510 		8 +  /* FENCE for VM_FLUSH */
6511 		20 + /* GDS switch */
6512 		4 + /* double SWITCH_BUFFER,
6513 		       the first COND_EXEC jump to the place just
6514 			   prior to this double SWITCH_BUFFER  */
6515 		5 + /* COND_EXEC */
6516 		7 +	 /*	HDP_flush */
6517 		4 +	 /*	VGT_flush */
6518 		14 + /*	CE_META */
6519 		31 + /*	DE_META */
6520 		3 + /* CNTX_CTRL */
6521 		5 + /* HDP_INVL */
6522 		8 + 8 + /* FENCE x2 */
6523 		2, /* SWITCH_BUFFER */
6524 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6525 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6526 	.emit_fence = gfx_v9_0_ring_emit_fence,
6527 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6528 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6529 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6530 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6531 	.test_ring = gfx_v9_0_ring_test_ring,
6532 	.test_ib = gfx_v9_0_ring_test_ib,
6533 	.insert_nop = amdgpu_ring_insert_nop,
6534 	.pad_ib = amdgpu_ring_generic_pad_ib,
6535 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6536 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6537 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6538 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6539 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6540 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6541 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6542 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6543 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6544 };
6545 
6546 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6547 	.type = AMDGPU_RING_TYPE_COMPUTE,
6548 	.align_mask = 0xff,
6549 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6550 	.support_64bit_ptrs = true,
6551 	.vmhub = AMDGPU_GFXHUB_0,
6552 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6553 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6554 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6555 	.emit_frame_size =
6556 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6557 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6558 		5 + /* hdp invalidate */
6559 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6560 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6561 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6562 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6563 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6564 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6565 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6566 	.emit_fence = gfx_v9_0_ring_emit_fence,
6567 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6568 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6569 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6570 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6571 	.test_ring = gfx_v9_0_ring_test_ring,
6572 	.test_ib = gfx_v9_0_ring_test_ib,
6573 	.insert_nop = amdgpu_ring_insert_nop,
6574 	.pad_ib = amdgpu_ring_generic_pad_ib,
6575 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6576 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6577 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6578 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6579 };
6580 
6581 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6582 	.type = AMDGPU_RING_TYPE_KIQ,
6583 	.align_mask = 0xff,
6584 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6585 	.support_64bit_ptrs = true,
6586 	.vmhub = AMDGPU_GFXHUB_0,
6587 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6588 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6589 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6590 	.emit_frame_size =
6591 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6592 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6593 		5 + /* hdp invalidate */
6594 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6595 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6596 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6597 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6598 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6599 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6600 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6601 	.test_ring = gfx_v9_0_ring_test_ring,
6602 	.insert_nop = amdgpu_ring_insert_nop,
6603 	.pad_ib = amdgpu_ring_generic_pad_ib,
6604 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6605 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6606 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6607 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6608 };
6609 
6610 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6611 {
6612 	int i;
6613 
6614 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6615 
6616 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6617 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6618 
6619 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6620 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6621 }
6622 
6623 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6624 	.set = gfx_v9_0_set_eop_interrupt_state,
6625 	.process = gfx_v9_0_eop_irq,
6626 };
6627 
6628 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6629 	.set = gfx_v9_0_set_priv_reg_fault_state,
6630 	.process = gfx_v9_0_priv_reg_irq,
6631 };
6632 
6633 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6634 	.set = gfx_v9_0_set_priv_inst_fault_state,
6635 	.process = gfx_v9_0_priv_inst_irq,
6636 };
6637 
6638 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6639 	.set = gfx_v9_0_set_cp_ecc_error_state,
6640 	.process = amdgpu_gfx_cp_ecc_error_irq,
6641 };
6642 
6643 
6644 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6645 {
6646 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6647 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6648 
6649 	adev->gfx.priv_reg_irq.num_types = 1;
6650 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6651 
6652 	adev->gfx.priv_inst_irq.num_types = 1;
6653 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6654 
6655 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6656 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6657 }
6658 
6659 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6660 {
6661 	switch (adev->asic_type) {
6662 	case CHIP_VEGA10:
6663 	case CHIP_VEGA12:
6664 	case CHIP_VEGA20:
6665 	case CHIP_RAVEN:
6666 	case CHIP_ARCTURUS:
6667 	case CHIP_RENOIR:
6668 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6669 		break;
6670 	default:
6671 		break;
6672 	}
6673 }
6674 
6675 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6676 {
6677 	/* init asci gds info */
6678 	switch (adev->asic_type) {
6679 	case CHIP_VEGA10:
6680 	case CHIP_VEGA12:
6681 	case CHIP_VEGA20:
6682 		adev->gds.gds_size = 0x10000;
6683 		break;
6684 	case CHIP_RAVEN:
6685 	case CHIP_ARCTURUS:
6686 		adev->gds.gds_size = 0x1000;
6687 		break;
6688 	default:
6689 		adev->gds.gds_size = 0x10000;
6690 		break;
6691 	}
6692 
6693 	switch (adev->asic_type) {
6694 	case CHIP_VEGA10:
6695 	case CHIP_VEGA20:
6696 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6697 		break;
6698 	case CHIP_VEGA12:
6699 		adev->gds.gds_compute_max_wave_id = 0x27f;
6700 		break;
6701 	case CHIP_RAVEN:
6702 		if (adev->rev_id >= 0x8)
6703 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6704 		else
6705 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6706 		break;
6707 	case CHIP_ARCTURUS:
6708 		adev->gds.gds_compute_max_wave_id = 0xfff;
6709 		break;
6710 	default:
6711 		/* this really depends on the chip */
6712 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6713 		break;
6714 	}
6715 
6716 	adev->gds.gws_size = 64;
6717 	adev->gds.oa_size = 16;
6718 }
6719 
6720 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6721 						 u32 bitmap)
6722 {
6723 	u32 data;
6724 
6725 	if (!bitmap)
6726 		return;
6727 
6728 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6729 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6730 
6731 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6732 }
6733 
6734 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6735 {
6736 	u32 data, mask;
6737 
6738 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6739 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6740 
6741 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6742 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6743 
6744 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6745 
6746 	return (~data) & mask;
6747 }
6748 
6749 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6750 				 struct amdgpu_cu_info *cu_info)
6751 {
6752 	int i, j, k, counter, active_cu_number = 0;
6753 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6754 	unsigned disable_masks[4 * 4];
6755 
6756 	if (!adev || !cu_info)
6757 		return -EINVAL;
6758 
6759 	/*
6760 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6761 	 */
6762 	if (adev->gfx.config.max_shader_engines *
6763 		adev->gfx.config.max_sh_per_se > 16)
6764 		return -EINVAL;
6765 
6766 	amdgpu_gfx_parse_disable_cu(disable_masks,
6767 				    adev->gfx.config.max_shader_engines,
6768 				    adev->gfx.config.max_sh_per_se);
6769 
6770 	mutex_lock(&adev->grbm_idx_mutex);
6771 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6772 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6773 			mask = 1;
6774 			ao_bitmap = 0;
6775 			counter = 0;
6776 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6777 			gfx_v9_0_set_user_cu_inactive_bitmap(
6778 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6779 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6780 
6781 			/*
6782 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6783 			 * 4x4 size array, and it's usually suitable for Vega
6784 			 * ASICs which has 4*2 SE/SH layout.
6785 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6786 			 * To mostly reduce the impact, we make it compatible
6787 			 * with current bitmap array as below:
6788 			 *    SE4,SH0 --> bitmap[0][1]
6789 			 *    SE5,SH0 --> bitmap[1][1]
6790 			 *    SE6,SH0 --> bitmap[2][1]
6791 			 *    SE7,SH0 --> bitmap[3][1]
6792 			 */
6793 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6794 
6795 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6796 				if (bitmap & mask) {
6797 					if (counter < adev->gfx.config.max_cu_per_sh)
6798 						ao_bitmap |= mask;
6799 					counter ++;
6800 				}
6801 				mask <<= 1;
6802 			}
6803 			active_cu_number += counter;
6804 			if (i < 2 && j < 2)
6805 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6806 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6807 		}
6808 	}
6809 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6810 	mutex_unlock(&adev->grbm_idx_mutex);
6811 
6812 	cu_info->number = active_cu_number;
6813 	cu_info->ao_cu_mask = ao_cu_mask;
6814 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6815 
6816 	return 0;
6817 }
6818 
6819 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6820 {
6821 	.type = AMD_IP_BLOCK_TYPE_GFX,
6822 	.major = 9,
6823 	.minor = 0,
6824 	.rev = 0,
6825 	.funcs = &gfx_v9_0_ip_funcs,
6826 };
6827