xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision b9df3997)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 enum ta_ras_gfx_subblock {
135 	/*CPC*/
136 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPC_UCODE,
139 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	/* CPF*/
147 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 	TA_RAS_BLOCK__GFX_CPF_TAG,
151 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 	/* CPG*/
153 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 	TA_RAS_BLOCK__GFX_CPG_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 	/* GDS*/
159 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	/* SPI*/
167 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 	/* SQ*/
169 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 	/* SQC (3 ranges)*/
176 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 	/* SQC range 0*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	/* SQC range 1*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	/* SQC range 2*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 	/* TA*/
219 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	/* TCA*/
227 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	/* TCC (5 sub-ranges)*/
232 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 	/* TCC range 0*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	/* TCC range 1*/
245 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	/* TCC range 2*/
251 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	/* TCC range 3*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	/* TCC range 4*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 	/* TCI*/
277 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 	/* TCP*/
279 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	/* TD*/
289 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	/* EA (3 sub-ranges)*/
295 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 	/* EA range 0*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	/* EA range 1*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	/* EA range 2*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 	/* UTC VM L2 bank*/
326 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 	/* UTC VM walker*/
328 	TA_RAS_BLOCK__UTC_VML2_WALKER,
329 	/* UTC ATC L2 2MB cache*/
330 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 	/* UTC ATC L2 4KB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 	TA_RAS_BLOCK__GFX_MAX
334 };
335 
336 struct ras_gfx_subblock {
337 	unsigned char *name;
338 	int ta_subblock;
339 	int hw_supported_error_type;
340 	int sw_supported_error_type;
341 };
342 
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345 		#subblock,                                                     \
346 		TA_RAS_BLOCK__##subblock,                                      \
347 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349 	}
350 
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369 			     0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 			     0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382 			     0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 			     0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 			     0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392 			     1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 			     0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436 			     1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457 			     0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 			     0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464 			     0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500 
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
523 };
524 
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546 
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561 
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
588 };
589 
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600 
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623 
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646 
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666 
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
682 };
683 
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 };
695 
696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
697 {
698 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
699 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 };
707 
708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
709 {
710 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
711 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 };
719 
720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
724 
725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
730                                  struct amdgpu_cu_info *cu_info);
731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
736 					  void *ras_error_status);
737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
738 				     void *inject_if);
739 
740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742 	switch (adev->asic_type) {
743 	case CHIP_VEGA10:
744 		soc15_program_register_sequence(adev,
745 						golden_settings_gc_9_0,
746 						ARRAY_SIZE(golden_settings_gc_9_0));
747 		soc15_program_register_sequence(adev,
748 						golden_settings_gc_9_0_vg10,
749 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
750 		break;
751 	case CHIP_VEGA12:
752 		soc15_program_register_sequence(adev,
753 						golden_settings_gc_9_2_1,
754 						ARRAY_SIZE(golden_settings_gc_9_2_1));
755 		soc15_program_register_sequence(adev,
756 						golden_settings_gc_9_2_1_vg12,
757 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
758 		break;
759 	case CHIP_VEGA20:
760 		soc15_program_register_sequence(adev,
761 						golden_settings_gc_9_0,
762 						ARRAY_SIZE(golden_settings_gc_9_0));
763 		soc15_program_register_sequence(adev,
764 						golden_settings_gc_9_0_vg20,
765 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
766 		break;
767 	case CHIP_ARCTURUS:
768 		soc15_program_register_sequence(adev,
769 						golden_settings_gc_9_4_1_arct,
770 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
771 		break;
772 	case CHIP_RAVEN:
773 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
774 						ARRAY_SIZE(golden_settings_gc_9_1));
775 		if (adev->rev_id >= 8)
776 			soc15_program_register_sequence(adev,
777 							golden_settings_gc_9_1_rv2,
778 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
779 		else
780 			soc15_program_register_sequence(adev,
781 							golden_settings_gc_9_1_rv1,
782 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
783 		break;
784 	 case CHIP_RENOIR:
785 		soc15_program_register_sequence(adev,
786 						golden_settings_gc_9_1_rn,
787 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
788 		return; /* for renoir, don't need common goldensetting */
789 	default:
790 		break;
791 	}
792 
793 	if (adev->asic_type != CHIP_ARCTURUS)
794 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
795 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
796 }
797 
798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
799 {
800 	adev->gfx.scratch.num_reg = 8;
801 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
802 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
803 }
804 
805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
806 				       bool wc, uint32_t reg, uint32_t val)
807 {
808 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
809 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
810 				WRITE_DATA_DST_SEL(0) |
811 				(wc ? WR_CONFIRM : 0));
812 	amdgpu_ring_write(ring, reg);
813 	amdgpu_ring_write(ring, 0);
814 	amdgpu_ring_write(ring, val);
815 }
816 
817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
818 				  int mem_space, int opt, uint32_t addr0,
819 				  uint32_t addr1, uint32_t ref, uint32_t mask,
820 				  uint32_t inv)
821 {
822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
823 	amdgpu_ring_write(ring,
824 				 /* memory (1) or register (0) */
825 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
826 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
827 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
828 				 WAIT_REG_MEM_ENGINE(eng_sel)));
829 
830 	if (mem_space)
831 		BUG_ON(addr0 & 0x3); /* Dword align */
832 	amdgpu_ring_write(ring, addr0);
833 	amdgpu_ring_write(ring, addr1);
834 	amdgpu_ring_write(ring, ref);
835 	amdgpu_ring_write(ring, mask);
836 	amdgpu_ring_write(ring, inv); /* poll interval */
837 }
838 
839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
840 {
841 	struct amdgpu_device *adev = ring->adev;
842 	uint32_t scratch;
843 	uint32_t tmp = 0;
844 	unsigned i;
845 	int r;
846 
847 	r = amdgpu_gfx_scratch_get(adev, &scratch);
848 	if (r)
849 		return r;
850 
851 	WREG32(scratch, 0xCAFEDEAD);
852 	r = amdgpu_ring_alloc(ring, 3);
853 	if (r)
854 		goto error_free_scratch;
855 
856 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
857 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
858 	amdgpu_ring_write(ring, 0xDEADBEEF);
859 	amdgpu_ring_commit(ring);
860 
861 	for (i = 0; i < adev->usec_timeout; i++) {
862 		tmp = RREG32(scratch);
863 		if (tmp == 0xDEADBEEF)
864 			break;
865 		udelay(1);
866 	}
867 
868 	if (i >= adev->usec_timeout)
869 		r = -ETIMEDOUT;
870 
871 error_free_scratch:
872 	amdgpu_gfx_scratch_free(adev, scratch);
873 	return r;
874 }
875 
876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 {
878 	struct amdgpu_device *adev = ring->adev;
879 	struct amdgpu_ib ib;
880 	struct dma_fence *f = NULL;
881 
882 	unsigned index;
883 	uint64_t gpu_addr;
884 	uint32_t tmp;
885 	long r;
886 
887 	r = amdgpu_device_wb_get(adev, &index);
888 	if (r)
889 		return r;
890 
891 	gpu_addr = adev->wb.gpu_addr + (index * 4);
892 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
893 	memset(&ib, 0, sizeof(ib));
894 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
895 	if (r)
896 		goto err1;
897 
898 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
899 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
900 	ib.ptr[2] = lower_32_bits(gpu_addr);
901 	ib.ptr[3] = upper_32_bits(gpu_addr);
902 	ib.ptr[4] = 0xDEADBEEF;
903 	ib.length_dw = 5;
904 
905 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
906 	if (r)
907 		goto err2;
908 
909 	r = dma_fence_wait_timeout(f, false, timeout);
910 	if (r == 0) {
911 		r = -ETIMEDOUT;
912 		goto err2;
913 	} else if (r < 0) {
914 		goto err2;
915 	}
916 
917 	tmp = adev->wb.wb[index];
918 	if (tmp == 0xDEADBEEF)
919 		r = 0;
920 	else
921 		r = -EINVAL;
922 
923 err2:
924 	amdgpu_ib_free(adev, &ib, NULL);
925 	dma_fence_put(f);
926 err1:
927 	amdgpu_device_wb_free(adev, index);
928 	return r;
929 }
930 
931 
932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
933 {
934 	release_firmware(adev->gfx.pfp_fw);
935 	adev->gfx.pfp_fw = NULL;
936 	release_firmware(adev->gfx.me_fw);
937 	adev->gfx.me_fw = NULL;
938 	release_firmware(adev->gfx.ce_fw);
939 	adev->gfx.ce_fw = NULL;
940 	release_firmware(adev->gfx.rlc_fw);
941 	adev->gfx.rlc_fw = NULL;
942 	release_firmware(adev->gfx.mec_fw);
943 	adev->gfx.mec_fw = NULL;
944 	release_firmware(adev->gfx.mec2_fw);
945 	adev->gfx.mec2_fw = NULL;
946 
947 	kfree(adev->gfx.rlc.register_list_format);
948 }
949 
950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
951 {
952 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
953 
954 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
955 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
956 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
957 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
958 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
959 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
960 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
961 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
962 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
963 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
964 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
965 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
966 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
967 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
968 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
969 }
970 
971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
972 {
973 	adev->gfx.me_fw_write_wait = false;
974 	adev->gfx.mec_fw_write_wait = false;
975 
976 	switch (adev->asic_type) {
977 	case CHIP_VEGA10:
978 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
979 		    (adev->gfx.me_feature_version >= 42) &&
980 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
981 		    (adev->gfx.pfp_feature_version >= 42))
982 			adev->gfx.me_fw_write_wait = true;
983 
984 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
985 		    (adev->gfx.mec_feature_version >= 42))
986 			adev->gfx.mec_fw_write_wait = true;
987 		break;
988 	case CHIP_VEGA12:
989 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
990 		    (adev->gfx.me_feature_version >= 44) &&
991 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
992 		    (adev->gfx.pfp_feature_version >= 44))
993 			adev->gfx.me_fw_write_wait = true;
994 
995 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
996 		    (adev->gfx.mec_feature_version >= 44))
997 			adev->gfx.mec_fw_write_wait = true;
998 		break;
999 	case CHIP_VEGA20:
1000 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1001 		    (adev->gfx.me_feature_version >= 44) &&
1002 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1003 		    (adev->gfx.pfp_feature_version >= 44))
1004 			adev->gfx.me_fw_write_wait = true;
1005 
1006 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1007 		    (adev->gfx.mec_feature_version >= 44))
1008 			adev->gfx.mec_fw_write_wait = true;
1009 		break;
1010 	case CHIP_RAVEN:
1011 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1012 		    (adev->gfx.me_feature_version >= 42) &&
1013 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1014 		    (adev->gfx.pfp_feature_version >= 42))
1015 			adev->gfx.me_fw_write_wait = true;
1016 
1017 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1018 		    (adev->gfx.mec_feature_version >= 42))
1019 			adev->gfx.mec_fw_write_wait = true;
1020 		break;
1021 	default:
1022 		break;
1023 	}
1024 }
1025 
1026 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1027 {
1028 	switch (adev->asic_type) {
1029 	case CHIP_VEGA10:
1030 	case CHIP_VEGA12:
1031 	case CHIP_VEGA20:
1032 		break;
1033 	case CHIP_RAVEN:
1034 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1035 			&&((adev->gfx.rlc_fw_version != 106 &&
1036 			     adev->gfx.rlc_fw_version < 531) ||
1037 			    (adev->gfx.rlc_fw_version == 53815) ||
1038 			    (adev->gfx.rlc_feature_version < 1) ||
1039 			    !adev->gfx.rlc.is_rlc_v2_1))
1040 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1041 
1042 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1043 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1044 				AMD_PG_SUPPORT_CP |
1045 				AMD_PG_SUPPORT_RLC_SMU_HS;
1046 		break;
1047 	default:
1048 		break;
1049 	}
1050 }
1051 
1052 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1053 					  const char *chip_name)
1054 {
1055 	char fw_name[30];
1056 	int err;
1057 	struct amdgpu_firmware_info *info = NULL;
1058 	const struct common_firmware_header *header = NULL;
1059 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1060 
1061 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1062 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1063 	if (err)
1064 		goto out;
1065 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1066 	if (err)
1067 		goto out;
1068 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1069 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1070 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1071 
1072 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1073 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1074 	if (err)
1075 		goto out;
1076 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1077 	if (err)
1078 		goto out;
1079 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1080 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1081 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1082 
1083 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1084 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1085 	if (err)
1086 		goto out;
1087 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1088 	if (err)
1089 		goto out;
1090 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1091 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1092 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1093 
1094 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1095 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1096 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1097 		info->fw = adev->gfx.pfp_fw;
1098 		header = (const struct common_firmware_header *)info->fw->data;
1099 		adev->firmware.fw_size +=
1100 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1101 
1102 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1103 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1104 		info->fw = adev->gfx.me_fw;
1105 		header = (const struct common_firmware_header *)info->fw->data;
1106 		adev->firmware.fw_size +=
1107 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1108 
1109 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1110 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1111 		info->fw = adev->gfx.ce_fw;
1112 		header = (const struct common_firmware_header *)info->fw->data;
1113 		adev->firmware.fw_size +=
1114 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1115 	}
1116 
1117 out:
1118 	if (err) {
1119 		dev_err(adev->dev,
1120 			"gfx9: Failed to load firmware \"%s\"\n",
1121 			fw_name);
1122 		release_firmware(adev->gfx.pfp_fw);
1123 		adev->gfx.pfp_fw = NULL;
1124 		release_firmware(adev->gfx.me_fw);
1125 		adev->gfx.me_fw = NULL;
1126 		release_firmware(adev->gfx.ce_fw);
1127 		adev->gfx.ce_fw = NULL;
1128 	}
1129 	return err;
1130 }
1131 
1132 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1133 					  const char *chip_name)
1134 {
1135 	char fw_name[30];
1136 	int err;
1137 	struct amdgpu_firmware_info *info = NULL;
1138 	const struct common_firmware_header *header = NULL;
1139 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1140 	unsigned int *tmp = NULL;
1141 	unsigned int i = 0;
1142 	uint16_t version_major;
1143 	uint16_t version_minor;
1144 	uint32_t smu_version;
1145 
1146 	/*
1147 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1148 	 * instead of picasso_rlc.bin.
1149 	 * Judgment method:
1150 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1151 	 *          or revision >= 0xD8 && revision <= 0xDF
1152 	 * otherwise is PCO FP5
1153 	 */
1154 	if (!strcmp(chip_name, "picasso") &&
1155 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1156 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1157 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1158 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1159 		(smu_version >= 0x41e2b))
1160 		/**
1161 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1162 		*/
1163 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1164 	else
1165 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1166 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1167 	if (err)
1168 		goto out;
1169 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1170 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1171 
1172 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1173 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1174 	if (version_major == 2 && version_minor == 1)
1175 		adev->gfx.rlc.is_rlc_v2_1 = true;
1176 
1177 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1178 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1179 	adev->gfx.rlc.save_and_restore_offset =
1180 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1181 	adev->gfx.rlc.clear_state_descriptor_offset =
1182 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1183 	adev->gfx.rlc.avail_scratch_ram_locations =
1184 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1185 	adev->gfx.rlc.reg_restore_list_size =
1186 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1187 	adev->gfx.rlc.reg_list_format_start =
1188 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1189 	adev->gfx.rlc.reg_list_format_separate_start =
1190 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1191 	adev->gfx.rlc.starting_offsets_start =
1192 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1193 	adev->gfx.rlc.reg_list_format_size_bytes =
1194 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1195 	adev->gfx.rlc.reg_list_size_bytes =
1196 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1197 	adev->gfx.rlc.register_list_format =
1198 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1199 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1200 	if (!adev->gfx.rlc.register_list_format) {
1201 		err = -ENOMEM;
1202 		goto out;
1203 	}
1204 
1205 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1206 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1207 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1208 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1209 
1210 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1211 
1212 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1213 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1214 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1215 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1216 
1217 	if (adev->gfx.rlc.is_rlc_v2_1)
1218 		gfx_v9_0_init_rlc_ext_microcode(adev);
1219 
1220 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1221 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1222 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1223 		info->fw = adev->gfx.rlc_fw;
1224 		header = (const struct common_firmware_header *)info->fw->data;
1225 		adev->firmware.fw_size +=
1226 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1227 
1228 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1229 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1230 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1231 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1232 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1233 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1234 			info->fw = adev->gfx.rlc_fw;
1235 			adev->firmware.fw_size +=
1236 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1237 
1238 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1239 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1240 			info->fw = adev->gfx.rlc_fw;
1241 			adev->firmware.fw_size +=
1242 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1243 
1244 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1245 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1246 			info->fw = adev->gfx.rlc_fw;
1247 			adev->firmware.fw_size +=
1248 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1249 		}
1250 	}
1251 
1252 out:
1253 	if (err) {
1254 		dev_err(adev->dev,
1255 			"gfx9: Failed to load firmware \"%s\"\n",
1256 			fw_name);
1257 		release_firmware(adev->gfx.rlc_fw);
1258 		adev->gfx.rlc_fw = NULL;
1259 	}
1260 	return err;
1261 }
1262 
1263 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1264 					  const char *chip_name)
1265 {
1266 	char fw_name[30];
1267 	int err;
1268 	struct amdgpu_firmware_info *info = NULL;
1269 	const struct common_firmware_header *header = NULL;
1270 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1271 
1272 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1273 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1274 	if (err)
1275 		goto out;
1276 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1277 	if (err)
1278 		goto out;
1279 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1280 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1281 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1282 
1283 
1284 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1285 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1286 	if (!err) {
1287 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1288 		if (err)
1289 			goto out;
1290 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1291 		adev->gfx.mec2_fw->data;
1292 		adev->gfx.mec2_fw_version =
1293 		le32_to_cpu(cp_hdr->header.ucode_version);
1294 		adev->gfx.mec2_feature_version =
1295 		le32_to_cpu(cp_hdr->ucode_feature_version);
1296 	} else {
1297 		err = 0;
1298 		adev->gfx.mec2_fw = NULL;
1299 	}
1300 
1301 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1302 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1303 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1304 		info->fw = adev->gfx.mec_fw;
1305 		header = (const struct common_firmware_header *)info->fw->data;
1306 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1307 		adev->firmware.fw_size +=
1308 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1309 
1310 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1311 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1312 		info->fw = adev->gfx.mec_fw;
1313 		adev->firmware.fw_size +=
1314 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1315 
1316 		if (adev->gfx.mec2_fw) {
1317 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1318 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1319 			info->fw = adev->gfx.mec2_fw;
1320 			header = (const struct common_firmware_header *)info->fw->data;
1321 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1322 			adev->firmware.fw_size +=
1323 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1324 
1325 			/* TODO: Determine if MEC2 JT FW loading can be removed
1326 				 for all GFX V9 asic and above */
1327 			if (adev->asic_type != CHIP_ARCTURUS) {
1328 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1329 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1330 				info->fw = adev->gfx.mec2_fw;
1331 				adev->firmware.fw_size +=
1332 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1333 					PAGE_SIZE);
1334 			}
1335 		}
1336 	}
1337 
1338 out:
1339 	gfx_v9_0_check_if_need_gfxoff(adev);
1340 	gfx_v9_0_check_fw_write_wait(adev);
1341 	if (err) {
1342 		dev_err(adev->dev,
1343 			"gfx9: Failed to load firmware \"%s\"\n",
1344 			fw_name);
1345 		release_firmware(adev->gfx.mec_fw);
1346 		adev->gfx.mec_fw = NULL;
1347 		release_firmware(adev->gfx.mec2_fw);
1348 		adev->gfx.mec2_fw = NULL;
1349 	}
1350 	return err;
1351 }
1352 
1353 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1354 {
1355 	const char *chip_name;
1356 	int r;
1357 
1358 	DRM_DEBUG("\n");
1359 
1360 	switch (adev->asic_type) {
1361 	case CHIP_VEGA10:
1362 		chip_name = "vega10";
1363 		break;
1364 	case CHIP_VEGA12:
1365 		chip_name = "vega12";
1366 		break;
1367 	case CHIP_VEGA20:
1368 		chip_name = "vega20";
1369 		break;
1370 	case CHIP_RAVEN:
1371 		if (adev->rev_id >= 8)
1372 			chip_name = "raven2";
1373 		else if (adev->pdev->device == 0x15d8)
1374 			chip_name = "picasso";
1375 		else
1376 			chip_name = "raven";
1377 		break;
1378 	case CHIP_ARCTURUS:
1379 		chip_name = "arcturus";
1380 		break;
1381 	case CHIP_RENOIR:
1382 		chip_name = "renoir";
1383 		break;
1384 	default:
1385 		BUG();
1386 	}
1387 
1388 	/* No CPG in Arcturus */
1389 	if (adev->asic_type != CHIP_ARCTURUS) {
1390 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1391 		if (r)
1392 			return r;
1393 	}
1394 
1395 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1396 	if (r)
1397 		return r;
1398 
1399 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1400 	if (r)
1401 		return r;
1402 
1403 	return r;
1404 }
1405 
1406 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1407 {
1408 	u32 count = 0;
1409 	const struct cs_section_def *sect = NULL;
1410 	const struct cs_extent_def *ext = NULL;
1411 
1412 	/* begin clear state */
1413 	count += 2;
1414 	/* context control state */
1415 	count += 3;
1416 
1417 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1418 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1419 			if (sect->id == SECT_CONTEXT)
1420 				count += 2 + ext->reg_count;
1421 			else
1422 				return 0;
1423 		}
1424 	}
1425 
1426 	/* end clear state */
1427 	count += 2;
1428 	/* clear state */
1429 	count += 2;
1430 
1431 	return count;
1432 }
1433 
1434 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1435 				    volatile u32 *buffer)
1436 {
1437 	u32 count = 0, i;
1438 	const struct cs_section_def *sect = NULL;
1439 	const struct cs_extent_def *ext = NULL;
1440 
1441 	if (adev->gfx.rlc.cs_data == NULL)
1442 		return;
1443 	if (buffer == NULL)
1444 		return;
1445 
1446 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1447 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1448 
1449 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1450 	buffer[count++] = cpu_to_le32(0x80000000);
1451 	buffer[count++] = cpu_to_le32(0x80000000);
1452 
1453 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1454 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1455 			if (sect->id == SECT_CONTEXT) {
1456 				buffer[count++] =
1457 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1458 				buffer[count++] = cpu_to_le32(ext->reg_index -
1459 						PACKET3_SET_CONTEXT_REG_START);
1460 				for (i = 0; i < ext->reg_count; i++)
1461 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1462 			} else {
1463 				return;
1464 			}
1465 		}
1466 	}
1467 
1468 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1469 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1470 
1471 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1472 	buffer[count++] = cpu_to_le32(0);
1473 }
1474 
1475 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1476 {
1477 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1478 	uint32_t pg_always_on_cu_num = 2;
1479 	uint32_t always_on_cu_num;
1480 	uint32_t i, j, k;
1481 	uint32_t mask, cu_bitmap, counter;
1482 
1483 	if (adev->flags & AMD_IS_APU)
1484 		always_on_cu_num = 4;
1485 	else if (adev->asic_type == CHIP_VEGA12)
1486 		always_on_cu_num = 8;
1487 	else
1488 		always_on_cu_num = 12;
1489 
1490 	mutex_lock(&adev->grbm_idx_mutex);
1491 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1492 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1493 			mask = 1;
1494 			cu_bitmap = 0;
1495 			counter = 0;
1496 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1497 
1498 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1499 				if (cu_info->bitmap[i][j] & mask) {
1500 					if (counter == pg_always_on_cu_num)
1501 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1502 					if (counter < always_on_cu_num)
1503 						cu_bitmap |= mask;
1504 					else
1505 						break;
1506 					counter++;
1507 				}
1508 				mask <<= 1;
1509 			}
1510 
1511 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1512 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1513 		}
1514 	}
1515 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1516 	mutex_unlock(&adev->grbm_idx_mutex);
1517 }
1518 
1519 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1520 {
1521 	uint32_t data;
1522 
1523 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1524 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1525 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1526 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1527 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1528 
1529 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1530 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1531 
1532 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1533 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1534 
1535 	mutex_lock(&adev->grbm_idx_mutex);
1536 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1537 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1538 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1539 
1540 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1541 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1542 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1543 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1544 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1545 
1546 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1547 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1548 	data &= 0x0000FFFF;
1549 	data |= 0x00C00000;
1550 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1551 
1552 	/*
1553 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1554 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1555 	 */
1556 
1557 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1558 	 * but used for RLC_LB_CNTL configuration */
1559 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1560 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1561 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1562 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1563 	mutex_unlock(&adev->grbm_idx_mutex);
1564 
1565 	gfx_v9_0_init_always_on_cu_mask(adev);
1566 }
1567 
1568 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1569 {
1570 	uint32_t data;
1571 
1572 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1573 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1574 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1575 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1576 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1577 
1578 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1579 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1580 
1581 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1582 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1583 
1584 	mutex_lock(&adev->grbm_idx_mutex);
1585 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1586 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1587 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1588 
1589 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1590 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1591 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1592 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1593 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1594 
1595 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1596 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1597 	data &= 0x0000FFFF;
1598 	data |= 0x00C00000;
1599 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1600 
1601 	/*
1602 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1603 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1604 	 */
1605 
1606 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1607 	 * but used for RLC_LB_CNTL configuration */
1608 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1609 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1610 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1611 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1612 	mutex_unlock(&adev->grbm_idx_mutex);
1613 
1614 	gfx_v9_0_init_always_on_cu_mask(adev);
1615 }
1616 
1617 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1618 {
1619 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1620 }
1621 
1622 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1623 {
1624 	return 5;
1625 }
1626 
1627 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1628 {
1629 	const struct cs_section_def *cs_data;
1630 	int r;
1631 
1632 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1633 
1634 	cs_data = adev->gfx.rlc.cs_data;
1635 
1636 	if (cs_data) {
1637 		/* init clear state block */
1638 		r = amdgpu_gfx_rlc_init_csb(adev);
1639 		if (r)
1640 			return r;
1641 	}
1642 
1643 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1644 		/* TODO: double check the cp_table_size for RV */
1645 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1646 		r = amdgpu_gfx_rlc_init_cpt(adev);
1647 		if (r)
1648 			return r;
1649 	}
1650 
1651 	switch (adev->asic_type) {
1652 	case CHIP_RAVEN:
1653 		gfx_v9_0_init_lbpw(adev);
1654 		break;
1655 	case CHIP_VEGA20:
1656 		gfx_v9_4_init_lbpw(adev);
1657 		break;
1658 	default:
1659 		break;
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1666 {
1667 	int r;
1668 
1669 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1670 	if (unlikely(r != 0))
1671 		return r;
1672 
1673 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1674 			AMDGPU_GEM_DOMAIN_VRAM);
1675 	if (!r)
1676 		adev->gfx.rlc.clear_state_gpu_addr =
1677 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1678 
1679 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1680 
1681 	return r;
1682 }
1683 
1684 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1685 {
1686 	int r;
1687 
1688 	if (!adev->gfx.rlc.clear_state_obj)
1689 		return;
1690 
1691 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1692 	if (likely(r == 0)) {
1693 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1694 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1695 	}
1696 }
1697 
1698 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1699 {
1700 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1701 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1702 }
1703 
1704 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1705 {
1706 	int r;
1707 	u32 *hpd;
1708 	const __le32 *fw_data;
1709 	unsigned fw_size;
1710 	u32 *fw;
1711 	size_t mec_hpd_size;
1712 
1713 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1714 
1715 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1716 
1717 	/* take ownership of the relevant compute queues */
1718 	amdgpu_gfx_compute_queue_acquire(adev);
1719 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1720 
1721 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1722 				      AMDGPU_GEM_DOMAIN_VRAM,
1723 				      &adev->gfx.mec.hpd_eop_obj,
1724 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1725 				      (void **)&hpd);
1726 	if (r) {
1727 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1728 		gfx_v9_0_mec_fini(adev);
1729 		return r;
1730 	}
1731 
1732 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1733 
1734 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1735 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1736 
1737 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1738 
1739 	fw_data = (const __le32 *)
1740 		(adev->gfx.mec_fw->data +
1741 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1742 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1743 
1744 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1745 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1746 				      &adev->gfx.mec.mec_fw_obj,
1747 				      &adev->gfx.mec.mec_fw_gpu_addr,
1748 				      (void **)&fw);
1749 	if (r) {
1750 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1751 		gfx_v9_0_mec_fini(adev);
1752 		return r;
1753 	}
1754 
1755 	memcpy(fw, fw_data, fw_size);
1756 
1757 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1758 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1759 
1760 	return 0;
1761 }
1762 
1763 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1764 {
1765 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1766 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1767 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1768 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1769 		(SQ_IND_INDEX__FORCE_READ_MASK));
1770 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1771 }
1772 
1773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1774 			   uint32_t wave, uint32_t thread,
1775 			   uint32_t regno, uint32_t num, uint32_t *out)
1776 {
1777 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1778 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1779 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1780 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1781 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1782 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1783 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1784 	while (num--)
1785 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1786 }
1787 
1788 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1789 {
1790 	/* type 1 wave data */
1791 	dst[(*no_fields)++] = 1;
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1799 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1800 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1801 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1802 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1803 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1804 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1805 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1806 }
1807 
1808 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1809 				     uint32_t wave, uint32_t start,
1810 				     uint32_t size, uint32_t *dst)
1811 {
1812 	wave_read_regs(
1813 		adev, simd, wave, 0,
1814 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1815 }
1816 
1817 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1818 				     uint32_t wave, uint32_t thread,
1819 				     uint32_t start, uint32_t size,
1820 				     uint32_t *dst)
1821 {
1822 	wave_read_regs(
1823 		adev, simd, wave, thread,
1824 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1825 }
1826 
1827 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1828 				  u32 me, u32 pipe, u32 q, u32 vm)
1829 {
1830 	soc15_grbm_select(adev, me, pipe, q, vm);
1831 }
1832 
1833 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1834 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1835 	.select_se_sh = &gfx_v9_0_select_se_sh,
1836 	.read_wave_data = &gfx_v9_0_read_wave_data,
1837 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1838 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1839 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1840 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1841 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1842 };
1843 
1844 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1845 {
1846 	u32 gb_addr_config;
1847 	int err;
1848 
1849 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1850 
1851 	switch (adev->asic_type) {
1852 	case CHIP_VEGA10:
1853 		adev->gfx.config.max_hw_contexts = 8;
1854 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1858 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1859 		break;
1860 	case CHIP_VEGA12:
1861 		adev->gfx.config.max_hw_contexts = 8;
1862 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1866 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1867 		DRM_INFO("fix gfx.config for vega12\n");
1868 		break;
1869 	case CHIP_VEGA20:
1870 		adev->gfx.config.max_hw_contexts = 8;
1871 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1875 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1876 		gb_addr_config &= ~0xf3e777ff;
1877 		gb_addr_config |= 0x22014042;
1878 		/* check vbios table if gpu info is not available */
1879 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1880 		if (err)
1881 			return err;
1882 		break;
1883 	case CHIP_RAVEN:
1884 		adev->gfx.config.max_hw_contexts = 8;
1885 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1886 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1887 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1888 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1889 		if (adev->rev_id >= 8)
1890 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1891 		else
1892 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1893 		break;
1894 	case CHIP_ARCTURUS:
1895 		adev->gfx.config.max_hw_contexts = 8;
1896 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1897 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1898 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1899 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1900 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1901 		gb_addr_config &= ~0xf3e777ff;
1902 		gb_addr_config |= 0x22014042;
1903 		break;
1904 	case CHIP_RENOIR:
1905 		adev->gfx.config.max_hw_contexts = 8;
1906 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1907 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1908 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1909 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1910 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1911 		gb_addr_config &= ~0xf3e777ff;
1912 		gb_addr_config |= 0x22010042;
1913 		break;
1914 	default:
1915 		BUG();
1916 		break;
1917 	}
1918 
1919 	adev->gfx.config.gb_addr_config = gb_addr_config;
1920 
1921 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1922 			REG_GET_FIELD(
1923 					adev->gfx.config.gb_addr_config,
1924 					GB_ADDR_CONFIG,
1925 					NUM_PIPES);
1926 
1927 	adev->gfx.config.max_tile_pipes =
1928 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1929 
1930 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1931 			REG_GET_FIELD(
1932 					adev->gfx.config.gb_addr_config,
1933 					GB_ADDR_CONFIG,
1934 					NUM_BANKS);
1935 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1936 			REG_GET_FIELD(
1937 					adev->gfx.config.gb_addr_config,
1938 					GB_ADDR_CONFIG,
1939 					MAX_COMPRESSED_FRAGS);
1940 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1941 			REG_GET_FIELD(
1942 					adev->gfx.config.gb_addr_config,
1943 					GB_ADDR_CONFIG,
1944 					NUM_RB_PER_SE);
1945 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1946 			REG_GET_FIELD(
1947 					adev->gfx.config.gb_addr_config,
1948 					GB_ADDR_CONFIG,
1949 					NUM_SHADER_ENGINES);
1950 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1951 			REG_GET_FIELD(
1952 					adev->gfx.config.gb_addr_config,
1953 					GB_ADDR_CONFIG,
1954 					PIPE_INTERLEAVE_SIZE));
1955 
1956 	return 0;
1957 }
1958 
1959 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1960 				   struct amdgpu_ngg_buf *ngg_buf,
1961 				   int size_se,
1962 				   int default_size_se)
1963 {
1964 	int r;
1965 
1966 	if (size_se < 0) {
1967 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1968 		return -EINVAL;
1969 	}
1970 	size_se = size_se ? size_se : default_size_se;
1971 
1972 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1973 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1974 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1975 				    &ngg_buf->bo,
1976 				    &ngg_buf->gpu_addr,
1977 				    NULL);
1978 	if (r) {
1979 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1980 		return r;
1981 	}
1982 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1983 
1984 	return r;
1985 }
1986 
1987 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1988 {
1989 	int i;
1990 
1991 	for (i = 0; i < NGG_BUF_MAX; i++)
1992 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1993 				      &adev->gfx.ngg.buf[i].gpu_addr,
1994 				      NULL);
1995 
1996 	memset(&adev->gfx.ngg.buf[0], 0,
1997 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1998 
1999 	adev->gfx.ngg.init = false;
2000 
2001 	return 0;
2002 }
2003 
2004 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2005 {
2006 	int r;
2007 
2008 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2009 		return 0;
2010 
2011 	/* GDS reserve memory: 64 bytes alignment */
2012 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2013 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2014 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2015 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2016 
2017 	/* Primitive Buffer */
2018 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2019 				    amdgpu_prim_buf_per_se,
2020 				    64 * 1024);
2021 	if (r) {
2022 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2023 		goto err;
2024 	}
2025 
2026 	/* Position Buffer */
2027 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2028 				    amdgpu_pos_buf_per_se,
2029 				    256 * 1024);
2030 	if (r) {
2031 		dev_err(adev->dev, "Failed to create Position Buffer\n");
2032 		goto err;
2033 	}
2034 
2035 	/* Control Sideband */
2036 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2037 				    amdgpu_cntl_sb_buf_per_se,
2038 				    256);
2039 	if (r) {
2040 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2041 		goto err;
2042 	}
2043 
2044 	/* Parameter Cache, not created by default */
2045 	if (amdgpu_param_buf_per_se <= 0)
2046 		goto out;
2047 
2048 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2049 				    amdgpu_param_buf_per_se,
2050 				    512 * 1024);
2051 	if (r) {
2052 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2053 		goto err;
2054 	}
2055 
2056 out:
2057 	adev->gfx.ngg.init = true;
2058 	return 0;
2059 err:
2060 	gfx_v9_0_ngg_fini(adev);
2061 	return r;
2062 }
2063 
2064 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2065 {
2066 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2067 	int r;
2068 	u32 data, base;
2069 
2070 	if (!amdgpu_ngg)
2071 		return 0;
2072 
2073 	/* Program buffer size */
2074 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2075 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2076 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2077 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2078 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2079 
2080 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2081 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2082 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2083 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2084 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2085 
2086 	/* Program buffer base address */
2087 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2088 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2089 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2090 
2091 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2092 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2093 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2094 
2095 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2096 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2097 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2098 
2099 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2100 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2101 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2102 
2103 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2104 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2105 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2106 
2107 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2108 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2109 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2110 
2111 	/* Clear GDS reserved memory */
2112 	r = amdgpu_ring_alloc(ring, 17);
2113 	if (r) {
2114 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2115 			  ring->name, r);
2116 		return r;
2117 	}
2118 
2119 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2120 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2121 			           (adev->gds.gds_size +
2122 				    adev->gfx.ngg.gds_reserve_size));
2123 
2124 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2125 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2126 				PACKET3_DMA_DATA_DST_SEL(1) |
2127 				PACKET3_DMA_DATA_SRC_SEL(2)));
2128 	amdgpu_ring_write(ring, 0);
2129 	amdgpu_ring_write(ring, 0);
2130 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2131 	amdgpu_ring_write(ring, 0);
2132 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2133 				adev->gfx.ngg.gds_reserve_size);
2134 
2135 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2136 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2137 
2138 	amdgpu_ring_commit(ring);
2139 
2140 	return 0;
2141 }
2142 
2143 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2144 				      int mec, int pipe, int queue)
2145 {
2146 	int r;
2147 	unsigned irq_type;
2148 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2149 
2150 	ring = &adev->gfx.compute_ring[ring_id];
2151 
2152 	/* mec0 is me1 */
2153 	ring->me = mec + 1;
2154 	ring->pipe = pipe;
2155 	ring->queue = queue;
2156 
2157 	ring->ring_obj = NULL;
2158 	ring->use_doorbell = true;
2159 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2160 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2161 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2162 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2163 
2164 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2165 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2166 		+ ring->pipe;
2167 
2168 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2169 	r = amdgpu_ring_init(adev, ring, 1024,
2170 			     &adev->gfx.eop_irq, irq_type);
2171 	if (r)
2172 		return r;
2173 
2174 
2175 	return 0;
2176 }
2177 
2178 static int gfx_v9_0_sw_init(void *handle)
2179 {
2180 	int i, j, k, r, ring_id;
2181 	struct amdgpu_ring *ring;
2182 	struct amdgpu_kiq *kiq;
2183 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2184 
2185 	switch (adev->asic_type) {
2186 	case CHIP_VEGA10:
2187 	case CHIP_VEGA12:
2188 	case CHIP_VEGA20:
2189 	case CHIP_RAVEN:
2190 	case CHIP_ARCTURUS:
2191 	case CHIP_RENOIR:
2192 		adev->gfx.mec.num_mec = 2;
2193 		break;
2194 	default:
2195 		adev->gfx.mec.num_mec = 1;
2196 		break;
2197 	}
2198 
2199 	adev->gfx.mec.num_pipe_per_mec = 4;
2200 	adev->gfx.mec.num_queue_per_pipe = 8;
2201 
2202 	/* EOP Event */
2203 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2204 	if (r)
2205 		return r;
2206 
2207 	/* Privileged reg */
2208 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2209 			      &adev->gfx.priv_reg_irq);
2210 	if (r)
2211 		return r;
2212 
2213 	/* Privileged inst */
2214 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2215 			      &adev->gfx.priv_inst_irq);
2216 	if (r)
2217 		return r;
2218 
2219 	/* ECC error */
2220 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2221 			      &adev->gfx.cp_ecc_error_irq);
2222 	if (r)
2223 		return r;
2224 
2225 	/* FUE error */
2226 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2227 			      &adev->gfx.cp_ecc_error_irq);
2228 	if (r)
2229 		return r;
2230 
2231 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2232 
2233 	gfx_v9_0_scratch_init(adev);
2234 
2235 	r = gfx_v9_0_init_microcode(adev);
2236 	if (r) {
2237 		DRM_ERROR("Failed to load gfx firmware!\n");
2238 		return r;
2239 	}
2240 
2241 	r = adev->gfx.rlc.funcs->init(adev);
2242 	if (r) {
2243 		DRM_ERROR("Failed to init rlc BOs!\n");
2244 		return r;
2245 	}
2246 
2247 	r = gfx_v9_0_mec_init(adev);
2248 	if (r) {
2249 		DRM_ERROR("Failed to init MEC BOs!\n");
2250 		return r;
2251 	}
2252 
2253 	/* set up the gfx ring */
2254 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2255 		ring = &adev->gfx.gfx_ring[i];
2256 		ring->ring_obj = NULL;
2257 		if (!i)
2258 			sprintf(ring->name, "gfx");
2259 		else
2260 			sprintf(ring->name, "gfx_%d", i);
2261 		ring->use_doorbell = true;
2262 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2263 		r = amdgpu_ring_init(adev, ring, 1024,
2264 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2265 		if (r)
2266 			return r;
2267 	}
2268 
2269 	/* set up the compute queues - allocate horizontally across pipes */
2270 	ring_id = 0;
2271 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2272 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2273 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2274 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2275 					continue;
2276 
2277 				r = gfx_v9_0_compute_ring_init(adev,
2278 							       ring_id,
2279 							       i, k, j);
2280 				if (r)
2281 					return r;
2282 
2283 				ring_id++;
2284 			}
2285 		}
2286 	}
2287 
2288 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2289 	if (r) {
2290 		DRM_ERROR("Failed to init KIQ BOs!\n");
2291 		return r;
2292 	}
2293 
2294 	kiq = &adev->gfx.kiq;
2295 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2296 	if (r)
2297 		return r;
2298 
2299 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2300 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2301 	if (r)
2302 		return r;
2303 
2304 	adev->gfx.ce_ram_size = 0x8000;
2305 
2306 	r = gfx_v9_0_gpu_early_init(adev);
2307 	if (r)
2308 		return r;
2309 
2310 	r = gfx_v9_0_ngg_init(adev);
2311 	if (r)
2312 		return r;
2313 
2314 	return 0;
2315 }
2316 
2317 
2318 static int gfx_v9_0_sw_fini(void *handle)
2319 {
2320 	int i;
2321 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2322 
2323 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2324 			adev->gfx.ras_if) {
2325 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2326 		struct ras_ih_if ih_info = {
2327 			.head = *ras_if,
2328 		};
2329 
2330 		amdgpu_ras_debugfs_remove(adev, ras_if);
2331 		amdgpu_ras_sysfs_remove(adev, ras_if);
2332 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2333 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2334 		kfree(ras_if);
2335 	}
2336 
2337 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2338 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2339 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2340 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2341 
2342 	amdgpu_gfx_mqd_sw_fini(adev);
2343 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2344 	amdgpu_gfx_kiq_fini(adev);
2345 
2346 	gfx_v9_0_mec_fini(adev);
2347 	gfx_v9_0_ngg_fini(adev);
2348 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2349 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2350 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2351 				&adev->gfx.rlc.cp_table_gpu_addr,
2352 				(void **)&adev->gfx.rlc.cp_table_ptr);
2353 	}
2354 	gfx_v9_0_free_microcode(adev);
2355 
2356 	return 0;
2357 }
2358 
2359 
2360 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2361 {
2362 	/* TODO */
2363 }
2364 
2365 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2366 {
2367 	u32 data;
2368 
2369 	if (instance == 0xffffffff)
2370 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2371 	else
2372 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2373 
2374 	if (se_num == 0xffffffff)
2375 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2376 	else
2377 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2378 
2379 	if (sh_num == 0xffffffff)
2380 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2381 	else
2382 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2383 
2384 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2385 }
2386 
2387 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2388 {
2389 	u32 data, mask;
2390 
2391 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2392 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2393 
2394 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2395 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2396 
2397 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2398 					 adev->gfx.config.max_sh_per_se);
2399 
2400 	return (~data) & mask;
2401 }
2402 
2403 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2404 {
2405 	int i, j;
2406 	u32 data;
2407 	u32 active_rbs = 0;
2408 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2409 					adev->gfx.config.max_sh_per_se;
2410 
2411 	mutex_lock(&adev->grbm_idx_mutex);
2412 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2413 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2414 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2415 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2416 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2417 					       rb_bitmap_width_per_sh);
2418 		}
2419 	}
2420 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2421 	mutex_unlock(&adev->grbm_idx_mutex);
2422 
2423 	adev->gfx.config.backend_enable_mask = active_rbs;
2424 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2425 }
2426 
2427 #define DEFAULT_SH_MEM_BASES	(0x6000)
2428 #define FIRST_COMPUTE_VMID	(8)
2429 #define LAST_COMPUTE_VMID	(16)
2430 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2431 {
2432 	int i;
2433 	uint32_t sh_mem_config;
2434 	uint32_t sh_mem_bases;
2435 
2436 	/*
2437 	 * Configure apertures:
2438 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2439 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2440 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2441 	 */
2442 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2443 
2444 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2445 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2446 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2447 
2448 	mutex_lock(&adev->srbm_mutex);
2449 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2450 		soc15_grbm_select(adev, 0, 0, 0, i);
2451 		/* CP and shaders */
2452 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2453 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2454 	}
2455 	soc15_grbm_select(adev, 0, 0, 0, 0);
2456 	mutex_unlock(&adev->srbm_mutex);
2457 
2458 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2459 	   acccess. These should be enabled by FW for target VMIDs. */
2460 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2461 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2462 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2463 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2464 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2465 	}
2466 }
2467 
2468 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2469 {
2470 	int vmid;
2471 
2472 	/*
2473 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2474 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2475 	 * the driver can enable them for graphics. VMID0 should maintain
2476 	 * access so that HWS firmware can save/restore entries.
2477 	 */
2478 	for (vmid = 1; vmid < 16; vmid++) {
2479 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2480 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2481 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2482 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2483 	}
2484 }
2485 
2486 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2487 {
2488 	u32 tmp;
2489 	int i;
2490 
2491 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2492 
2493 	gfx_v9_0_tiling_mode_table_init(adev);
2494 
2495 	gfx_v9_0_setup_rb(adev);
2496 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2497 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2498 
2499 	/* XXX SH_MEM regs */
2500 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2501 	mutex_lock(&adev->srbm_mutex);
2502 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2503 		soc15_grbm_select(adev, 0, 0, 0, i);
2504 		/* CP and shaders */
2505 		if (i == 0) {
2506 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2507 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2508 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2509 					    !!amdgpu_noretry);
2510 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2511 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2512 		} else {
2513 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2514 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2515 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2516 					    !!amdgpu_noretry);
2517 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2518 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2519 				(adev->gmc.private_aperture_start >> 48));
2520 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2521 				(adev->gmc.shared_aperture_start >> 48));
2522 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2523 		}
2524 	}
2525 	soc15_grbm_select(adev, 0, 0, 0, 0);
2526 
2527 	mutex_unlock(&adev->srbm_mutex);
2528 
2529 	gfx_v9_0_init_compute_vmid(adev);
2530 	gfx_v9_0_init_gds_vmid(adev);
2531 }
2532 
2533 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2534 {
2535 	u32 i, j, k;
2536 	u32 mask;
2537 
2538 	mutex_lock(&adev->grbm_idx_mutex);
2539 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2540 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2541 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2542 			for (k = 0; k < adev->usec_timeout; k++) {
2543 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2544 					break;
2545 				udelay(1);
2546 			}
2547 			if (k == adev->usec_timeout) {
2548 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2549 						      0xffffffff, 0xffffffff);
2550 				mutex_unlock(&adev->grbm_idx_mutex);
2551 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2552 					 i, j);
2553 				return;
2554 			}
2555 		}
2556 	}
2557 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2558 	mutex_unlock(&adev->grbm_idx_mutex);
2559 
2560 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2561 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2562 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2563 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2564 	for (k = 0; k < adev->usec_timeout; k++) {
2565 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2566 			break;
2567 		udelay(1);
2568 	}
2569 }
2570 
2571 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2572 					       bool enable)
2573 {
2574 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2575 
2576 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2577 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2578 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2579 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2580 
2581 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2582 }
2583 
2584 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2585 {
2586 	/* csib */
2587 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2588 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2589 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2590 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2591 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2592 			adev->gfx.rlc.clear_state_size);
2593 }
2594 
2595 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2596 				int indirect_offset,
2597 				int list_size,
2598 				int *unique_indirect_regs,
2599 				int unique_indirect_reg_count,
2600 				int *indirect_start_offsets,
2601 				int *indirect_start_offsets_count,
2602 				int max_start_offsets_count)
2603 {
2604 	int idx;
2605 
2606 	for (; indirect_offset < list_size; indirect_offset++) {
2607 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2608 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2609 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2610 
2611 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2612 			indirect_offset += 2;
2613 
2614 			/* look for the matching indice */
2615 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2616 				if (unique_indirect_regs[idx] ==
2617 					register_list_format[indirect_offset] ||
2618 					!unique_indirect_regs[idx])
2619 					break;
2620 			}
2621 
2622 			BUG_ON(idx >= unique_indirect_reg_count);
2623 
2624 			if (!unique_indirect_regs[idx])
2625 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2626 
2627 			indirect_offset++;
2628 		}
2629 	}
2630 }
2631 
2632 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2633 {
2634 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2635 	int unique_indirect_reg_count = 0;
2636 
2637 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2638 	int indirect_start_offsets_count = 0;
2639 
2640 	int list_size = 0;
2641 	int i = 0, j = 0;
2642 	u32 tmp = 0;
2643 
2644 	u32 *register_list_format =
2645 		kmemdup(adev->gfx.rlc.register_list_format,
2646 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2647 	if (!register_list_format)
2648 		return -ENOMEM;
2649 
2650 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2651 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2652 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2653 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2654 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2655 				    unique_indirect_regs,
2656 				    unique_indirect_reg_count,
2657 				    indirect_start_offsets,
2658 				    &indirect_start_offsets_count,
2659 				    ARRAY_SIZE(indirect_start_offsets));
2660 
2661 	/* enable auto inc in case it is disabled */
2662 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2663 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2664 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2665 
2666 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2667 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2668 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2669 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2670 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2671 			adev->gfx.rlc.register_restore[i]);
2672 
2673 	/* load indirect register */
2674 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2675 		adev->gfx.rlc.reg_list_format_start);
2676 
2677 	/* direct register portion */
2678 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2679 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2680 			register_list_format[i]);
2681 
2682 	/* indirect register portion */
2683 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2684 		if (register_list_format[i] == 0xFFFFFFFF) {
2685 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2686 			continue;
2687 		}
2688 
2689 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2690 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2691 
2692 		for (j = 0; j < unique_indirect_reg_count; j++) {
2693 			if (register_list_format[i] == unique_indirect_regs[j]) {
2694 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2695 				break;
2696 			}
2697 		}
2698 
2699 		BUG_ON(j >= unique_indirect_reg_count);
2700 
2701 		i++;
2702 	}
2703 
2704 	/* set save/restore list size */
2705 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2706 	list_size = list_size >> 1;
2707 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2708 		adev->gfx.rlc.reg_restore_list_size);
2709 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2710 
2711 	/* write the starting offsets to RLC scratch ram */
2712 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2713 		adev->gfx.rlc.starting_offsets_start);
2714 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2715 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2716 		       indirect_start_offsets[i]);
2717 
2718 	/* load unique indirect regs*/
2719 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2720 		if (unique_indirect_regs[i] != 0) {
2721 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2722 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2723 			       unique_indirect_regs[i] & 0x3FFFF);
2724 
2725 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2726 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2727 			       unique_indirect_regs[i] >> 20);
2728 		}
2729 	}
2730 
2731 	kfree(register_list_format);
2732 	return 0;
2733 }
2734 
2735 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2736 {
2737 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2738 }
2739 
2740 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2741 					     bool enable)
2742 {
2743 	uint32_t data = 0;
2744 	uint32_t default_data = 0;
2745 
2746 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2747 	if (enable == true) {
2748 		/* enable GFXIP control over CGPG */
2749 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2750 		if(default_data != data)
2751 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2752 
2753 		/* update status */
2754 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2755 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2756 		if(default_data != data)
2757 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2758 	} else {
2759 		/* restore GFXIP control over GCPG */
2760 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2761 		if(default_data != data)
2762 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2763 	}
2764 }
2765 
2766 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2767 {
2768 	uint32_t data = 0;
2769 
2770 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2771 			      AMD_PG_SUPPORT_GFX_SMG |
2772 			      AMD_PG_SUPPORT_GFX_DMG)) {
2773 		/* init IDLE_POLL_COUNT = 60 */
2774 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2775 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2776 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2777 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2778 
2779 		/* init RLC PG Delay */
2780 		data = 0;
2781 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2782 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2783 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2784 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2785 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2786 
2787 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2788 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2789 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2790 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2791 
2792 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2793 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2794 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2795 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2796 
2797 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2798 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2799 
2800 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2801 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2802 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2803 
2804 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2805 	}
2806 }
2807 
2808 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2809 						bool enable)
2810 {
2811 	uint32_t data = 0;
2812 	uint32_t default_data = 0;
2813 
2814 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2815 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2816 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2817 			     enable ? 1 : 0);
2818 	if (default_data != data)
2819 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2820 }
2821 
2822 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2823 						bool enable)
2824 {
2825 	uint32_t data = 0;
2826 	uint32_t default_data = 0;
2827 
2828 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2829 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2830 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2831 			     enable ? 1 : 0);
2832 	if(default_data != data)
2833 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2834 }
2835 
2836 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2837 					bool enable)
2838 {
2839 	uint32_t data = 0;
2840 	uint32_t default_data = 0;
2841 
2842 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2843 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2844 			     CP_PG_DISABLE,
2845 			     enable ? 0 : 1);
2846 	if(default_data != data)
2847 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2848 }
2849 
2850 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2851 						bool enable)
2852 {
2853 	uint32_t data, default_data;
2854 
2855 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2856 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2857 			     GFX_POWER_GATING_ENABLE,
2858 			     enable ? 1 : 0);
2859 	if(default_data != data)
2860 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2861 }
2862 
2863 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2864 						bool enable)
2865 {
2866 	uint32_t data, default_data;
2867 
2868 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870 			     GFX_PIPELINE_PG_ENABLE,
2871 			     enable ? 1 : 0);
2872 	if(default_data != data)
2873 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 
2875 	if (!enable)
2876 		/* read any GFX register to wake up GFX */
2877 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2878 }
2879 
2880 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2881 						       bool enable)
2882 {
2883 	uint32_t data, default_data;
2884 
2885 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2886 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2887 			     STATIC_PER_CU_PG_ENABLE,
2888 			     enable ? 1 : 0);
2889 	if(default_data != data)
2890 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2891 }
2892 
2893 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2894 						bool enable)
2895 {
2896 	uint32_t data, default_data;
2897 
2898 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900 			     DYN_PER_CU_PG_ENABLE,
2901 			     enable ? 1 : 0);
2902 	if(default_data != data)
2903 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905 
2906 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2907 {
2908 	gfx_v9_0_init_csb(adev);
2909 
2910 	/*
2911 	 * Rlc save restore list is workable since v2_1.
2912 	 * And it's needed by gfxoff feature.
2913 	 */
2914 	if (adev->gfx.rlc.is_rlc_v2_1) {
2915 		gfx_v9_1_init_rlc_save_restore_list(adev);
2916 		gfx_v9_0_enable_save_restore_machine(adev);
2917 	}
2918 
2919 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2920 			      AMD_PG_SUPPORT_GFX_SMG |
2921 			      AMD_PG_SUPPORT_GFX_DMG |
2922 			      AMD_PG_SUPPORT_CP |
2923 			      AMD_PG_SUPPORT_GDS |
2924 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2925 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2926 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2927 		gfx_v9_0_init_gfx_power_gating(adev);
2928 	}
2929 }
2930 
2931 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2932 {
2933 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2934 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2935 	gfx_v9_0_wait_for_rlc_serdes(adev);
2936 }
2937 
2938 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2939 {
2940 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2941 	udelay(50);
2942 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2943 	udelay(50);
2944 }
2945 
2946 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2947 {
2948 #ifdef AMDGPU_RLC_DEBUG_RETRY
2949 	u32 rlc_ucode_ver;
2950 #endif
2951 
2952 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2953 	udelay(50);
2954 
2955 	/* carrizo do enable cp interrupt after cp inited */
2956 	if (!(adev->flags & AMD_IS_APU)) {
2957 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2958 		udelay(50);
2959 	}
2960 
2961 #ifdef AMDGPU_RLC_DEBUG_RETRY
2962 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2963 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2964 	if(rlc_ucode_ver == 0x108) {
2965 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2966 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2967 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2968 		 * default is 0x9C4 to create a 100us interval */
2969 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2970 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2971 		 * to disable the page fault retry interrupts, default is
2972 		 * 0x100 (256) */
2973 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2974 	}
2975 #endif
2976 }
2977 
2978 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2979 {
2980 	const struct rlc_firmware_header_v2_0 *hdr;
2981 	const __le32 *fw_data;
2982 	unsigned i, fw_size;
2983 
2984 	if (!adev->gfx.rlc_fw)
2985 		return -EINVAL;
2986 
2987 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2988 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2989 
2990 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2991 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2992 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2993 
2994 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2995 			RLCG_UCODE_LOADING_START_ADDRESS);
2996 	for (i = 0; i < fw_size; i++)
2997 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2998 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2999 
3000 	return 0;
3001 }
3002 
3003 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3004 {
3005 	int r;
3006 
3007 	if (amdgpu_sriov_vf(adev)) {
3008 		gfx_v9_0_init_csb(adev);
3009 		return 0;
3010 	}
3011 
3012 	adev->gfx.rlc.funcs->stop(adev);
3013 
3014 	/* disable CG */
3015 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3016 
3017 	gfx_v9_0_init_pg(adev);
3018 
3019 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3020 		/* legacy rlc firmware loading */
3021 		r = gfx_v9_0_rlc_load_microcode(adev);
3022 		if (r)
3023 			return r;
3024 	}
3025 
3026 	switch (adev->asic_type) {
3027 	case CHIP_RAVEN:
3028 		if (amdgpu_lbpw == 0)
3029 			gfx_v9_0_enable_lbpw(adev, false);
3030 		else
3031 			gfx_v9_0_enable_lbpw(adev, true);
3032 		break;
3033 	case CHIP_VEGA20:
3034 		if (amdgpu_lbpw > 0)
3035 			gfx_v9_0_enable_lbpw(adev, true);
3036 		else
3037 			gfx_v9_0_enable_lbpw(adev, false);
3038 		break;
3039 	default:
3040 		break;
3041 	}
3042 
3043 	adev->gfx.rlc.funcs->start(adev);
3044 
3045 	return 0;
3046 }
3047 
3048 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3049 {
3050 	int i;
3051 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3052 
3053 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3054 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3055 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3056 	if (!enable) {
3057 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3058 			adev->gfx.gfx_ring[i].sched.ready = false;
3059 	}
3060 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3061 	udelay(50);
3062 }
3063 
3064 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3065 {
3066 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3067 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3068 	const struct gfx_firmware_header_v1_0 *me_hdr;
3069 	const __le32 *fw_data;
3070 	unsigned i, fw_size;
3071 
3072 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3073 		return -EINVAL;
3074 
3075 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3076 		adev->gfx.pfp_fw->data;
3077 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3078 		adev->gfx.ce_fw->data;
3079 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3080 		adev->gfx.me_fw->data;
3081 
3082 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3083 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3084 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3085 
3086 	gfx_v9_0_cp_gfx_enable(adev, false);
3087 
3088 	/* PFP */
3089 	fw_data = (const __le32 *)
3090 		(adev->gfx.pfp_fw->data +
3091 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3092 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3093 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3094 	for (i = 0; i < fw_size; i++)
3095 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3096 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3097 
3098 	/* CE */
3099 	fw_data = (const __le32 *)
3100 		(adev->gfx.ce_fw->data +
3101 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3102 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3103 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3104 	for (i = 0; i < fw_size; i++)
3105 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3106 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3107 
3108 	/* ME */
3109 	fw_data = (const __le32 *)
3110 		(adev->gfx.me_fw->data +
3111 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3112 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3113 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3114 	for (i = 0; i < fw_size; i++)
3115 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3116 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3117 
3118 	return 0;
3119 }
3120 
3121 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3122 {
3123 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3124 	const struct cs_section_def *sect = NULL;
3125 	const struct cs_extent_def *ext = NULL;
3126 	int r, i, tmp;
3127 
3128 	/* init the CP */
3129 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3130 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3131 
3132 	gfx_v9_0_cp_gfx_enable(adev, true);
3133 
3134 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3135 	if (r) {
3136 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3137 		return r;
3138 	}
3139 
3140 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3141 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3142 
3143 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3144 	amdgpu_ring_write(ring, 0x80000000);
3145 	amdgpu_ring_write(ring, 0x80000000);
3146 
3147 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3148 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3149 			if (sect->id == SECT_CONTEXT) {
3150 				amdgpu_ring_write(ring,
3151 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3152 					       ext->reg_count));
3153 				amdgpu_ring_write(ring,
3154 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3155 				for (i = 0; i < ext->reg_count; i++)
3156 					amdgpu_ring_write(ring, ext->extent[i]);
3157 			}
3158 		}
3159 	}
3160 
3161 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3162 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3163 
3164 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3165 	amdgpu_ring_write(ring, 0);
3166 
3167 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3168 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3169 	amdgpu_ring_write(ring, 0x8000);
3170 	amdgpu_ring_write(ring, 0x8000);
3171 
3172 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3173 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3174 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3175 	amdgpu_ring_write(ring, tmp);
3176 	amdgpu_ring_write(ring, 0);
3177 
3178 	amdgpu_ring_commit(ring);
3179 
3180 	return 0;
3181 }
3182 
3183 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3184 {
3185 	struct amdgpu_ring *ring;
3186 	u32 tmp;
3187 	u32 rb_bufsz;
3188 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3189 
3190 	/* Set the write pointer delay */
3191 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3192 
3193 	/* set the RB to use vmid 0 */
3194 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3195 
3196 	/* Set ring buffer size */
3197 	ring = &adev->gfx.gfx_ring[0];
3198 	rb_bufsz = order_base_2(ring->ring_size / 8);
3199 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3200 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3201 #ifdef __BIG_ENDIAN
3202 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3203 #endif
3204 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3205 
3206 	/* Initialize the ring buffer's write pointers */
3207 	ring->wptr = 0;
3208 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3209 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3210 
3211 	/* set the wb address wether it's enabled or not */
3212 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3213 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3214 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3215 
3216 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3217 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3218 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3219 
3220 	mdelay(1);
3221 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3222 
3223 	rb_addr = ring->gpu_addr >> 8;
3224 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3225 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3226 
3227 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3228 	if (ring->use_doorbell) {
3229 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3230 				    DOORBELL_OFFSET, ring->doorbell_index);
3231 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3232 				    DOORBELL_EN, 1);
3233 	} else {
3234 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3235 	}
3236 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3237 
3238 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3239 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3240 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3241 
3242 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3243 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3244 
3245 
3246 	/* start the ring */
3247 	gfx_v9_0_cp_gfx_start(adev);
3248 	ring->sched.ready = true;
3249 
3250 	return 0;
3251 }
3252 
3253 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3254 {
3255 	int i;
3256 
3257 	if (enable) {
3258 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3259 	} else {
3260 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3261 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3262 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3263 			adev->gfx.compute_ring[i].sched.ready = false;
3264 		adev->gfx.kiq.ring.sched.ready = false;
3265 	}
3266 	udelay(50);
3267 }
3268 
3269 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3270 {
3271 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3272 	const __le32 *fw_data;
3273 	unsigned i;
3274 	u32 tmp;
3275 
3276 	if (!adev->gfx.mec_fw)
3277 		return -EINVAL;
3278 
3279 	gfx_v9_0_cp_compute_enable(adev, false);
3280 
3281 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3282 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3283 
3284 	fw_data = (const __le32 *)
3285 		(adev->gfx.mec_fw->data +
3286 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3287 	tmp = 0;
3288 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3289 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3290 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3291 
3292 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3293 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3294 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3295 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3296 
3297 	/* MEC1 */
3298 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3299 			 mec_hdr->jt_offset);
3300 	for (i = 0; i < mec_hdr->jt_size; i++)
3301 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3302 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3303 
3304 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3305 			adev->gfx.mec_fw_version);
3306 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3307 
3308 	return 0;
3309 }
3310 
3311 /* KIQ functions */
3312 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3313 {
3314 	uint32_t tmp;
3315 	struct amdgpu_device *adev = ring->adev;
3316 
3317 	/* tell RLC which is KIQ queue */
3318 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3319 	tmp &= 0xffffff00;
3320 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3321 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3322 	tmp |= 0x80;
3323 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3324 }
3325 
3326 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3327 {
3328 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3329 	uint64_t queue_mask = 0;
3330 	int r, i;
3331 
3332 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3333 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3334 			continue;
3335 
3336 		/* This situation may be hit in the future if a new HW
3337 		 * generation exposes more than 64 queues. If so, the
3338 		 * definition of queue_mask needs updating */
3339 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3340 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3341 			break;
3342 		}
3343 
3344 		queue_mask |= (1ull << i);
3345 	}
3346 
3347 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3348 	if (r) {
3349 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3350 		return r;
3351 	}
3352 
3353 	/* set resources */
3354 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3355 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3356 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3357 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3358 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3359 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3360 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3361 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3362 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3363 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3364 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3365 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3366 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3367 
3368 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3369 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3370 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3371 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3372 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3373 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3374 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3375 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3376 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3377 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3378 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3379 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3380 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3381 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3382 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3383 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3384 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3385 	}
3386 
3387 	r = amdgpu_ring_test_helper(kiq_ring);
3388 	if (r)
3389 		DRM_ERROR("KCQ enable failed\n");
3390 
3391 	return r;
3392 }
3393 
3394 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3395 {
3396 	struct amdgpu_device *adev = ring->adev;
3397 	struct v9_mqd *mqd = ring->mqd_ptr;
3398 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3399 	uint32_t tmp;
3400 
3401 	mqd->header = 0xC0310800;
3402 	mqd->compute_pipelinestat_enable = 0x00000001;
3403 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3404 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3405 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3406 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3407 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3408 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3409 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3410 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3411 	mqd->compute_misc_reserved = 0x00000003;
3412 
3413 	mqd->dynamic_cu_mask_addr_lo =
3414 		lower_32_bits(ring->mqd_gpu_addr
3415 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3416 	mqd->dynamic_cu_mask_addr_hi =
3417 		upper_32_bits(ring->mqd_gpu_addr
3418 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3419 
3420 	eop_base_addr = ring->eop_gpu_addr >> 8;
3421 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3422 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3423 
3424 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3425 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3426 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3427 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3428 
3429 	mqd->cp_hqd_eop_control = tmp;
3430 
3431 	/* enable doorbell? */
3432 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3433 
3434 	if (ring->use_doorbell) {
3435 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3436 				    DOORBELL_OFFSET, ring->doorbell_index);
3437 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438 				    DOORBELL_EN, 1);
3439 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3440 				    DOORBELL_SOURCE, 0);
3441 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3442 				    DOORBELL_HIT, 0);
3443 	} else {
3444 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3445 					 DOORBELL_EN, 0);
3446 	}
3447 
3448 	mqd->cp_hqd_pq_doorbell_control = tmp;
3449 
3450 	/* disable the queue if it's active */
3451 	ring->wptr = 0;
3452 	mqd->cp_hqd_dequeue_request = 0;
3453 	mqd->cp_hqd_pq_rptr = 0;
3454 	mqd->cp_hqd_pq_wptr_lo = 0;
3455 	mqd->cp_hqd_pq_wptr_hi = 0;
3456 
3457 	/* set the pointer to the MQD */
3458 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3459 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3460 
3461 	/* set MQD vmid to 0 */
3462 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3463 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3464 	mqd->cp_mqd_control = tmp;
3465 
3466 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3467 	hqd_gpu_addr = ring->gpu_addr >> 8;
3468 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3469 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3470 
3471 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3472 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3473 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3474 			    (order_base_2(ring->ring_size / 4) - 1));
3475 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3476 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3477 #ifdef __BIG_ENDIAN
3478 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3479 #endif
3480 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3481 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3482 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3483 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3484 	mqd->cp_hqd_pq_control = tmp;
3485 
3486 	/* set the wb address whether it's enabled or not */
3487 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3488 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3489 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3490 		upper_32_bits(wb_gpu_addr) & 0xffff;
3491 
3492 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3493 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3494 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3495 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3496 
3497 	tmp = 0;
3498 	/* enable the doorbell if requested */
3499 	if (ring->use_doorbell) {
3500 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3501 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502 				DOORBELL_OFFSET, ring->doorbell_index);
3503 
3504 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505 					 DOORBELL_EN, 1);
3506 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507 					 DOORBELL_SOURCE, 0);
3508 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509 					 DOORBELL_HIT, 0);
3510 	}
3511 
3512 	mqd->cp_hqd_pq_doorbell_control = tmp;
3513 
3514 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3515 	ring->wptr = 0;
3516 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3517 
3518 	/* set the vmid for the queue */
3519 	mqd->cp_hqd_vmid = 0;
3520 
3521 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3522 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3523 	mqd->cp_hqd_persistent_state = tmp;
3524 
3525 	/* set MIN_IB_AVAIL_SIZE */
3526 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3527 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3528 	mqd->cp_hqd_ib_control = tmp;
3529 
3530 	/* activate the queue */
3531 	mqd->cp_hqd_active = 1;
3532 
3533 	return 0;
3534 }
3535 
3536 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3537 {
3538 	struct amdgpu_device *adev = ring->adev;
3539 	struct v9_mqd *mqd = ring->mqd_ptr;
3540 	int j;
3541 
3542 	/* disable wptr polling */
3543 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3544 
3545 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3546 	       mqd->cp_hqd_eop_base_addr_lo);
3547 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3548 	       mqd->cp_hqd_eop_base_addr_hi);
3549 
3550 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3551 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3552 	       mqd->cp_hqd_eop_control);
3553 
3554 	/* enable doorbell? */
3555 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3556 	       mqd->cp_hqd_pq_doorbell_control);
3557 
3558 	/* disable the queue if it's active */
3559 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3560 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3561 		for (j = 0; j < adev->usec_timeout; j++) {
3562 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3563 				break;
3564 			udelay(1);
3565 		}
3566 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3567 		       mqd->cp_hqd_dequeue_request);
3568 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3569 		       mqd->cp_hqd_pq_rptr);
3570 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3571 		       mqd->cp_hqd_pq_wptr_lo);
3572 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3573 		       mqd->cp_hqd_pq_wptr_hi);
3574 	}
3575 
3576 	/* set the pointer to the MQD */
3577 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3578 	       mqd->cp_mqd_base_addr_lo);
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3580 	       mqd->cp_mqd_base_addr_hi);
3581 
3582 	/* set MQD vmid to 0 */
3583 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3584 	       mqd->cp_mqd_control);
3585 
3586 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3587 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3588 	       mqd->cp_hqd_pq_base_lo);
3589 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3590 	       mqd->cp_hqd_pq_base_hi);
3591 
3592 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3593 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3594 	       mqd->cp_hqd_pq_control);
3595 
3596 	/* set the wb address whether it's enabled or not */
3597 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3598 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3599 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3600 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3601 
3602 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3603 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3604 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3605 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3606 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3607 
3608 	/* enable the doorbell if requested */
3609 	if (ring->use_doorbell) {
3610 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3611 					(adev->doorbell_index.kiq * 2) << 2);
3612 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3613 					(adev->doorbell_index.userqueue_end * 2) << 2);
3614 	}
3615 
3616 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3617 	       mqd->cp_hqd_pq_doorbell_control);
3618 
3619 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3621 	       mqd->cp_hqd_pq_wptr_lo);
3622 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3623 	       mqd->cp_hqd_pq_wptr_hi);
3624 
3625 	/* set the vmid for the queue */
3626 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3627 
3628 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3629 	       mqd->cp_hqd_persistent_state);
3630 
3631 	/* activate the queue */
3632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3633 	       mqd->cp_hqd_active);
3634 
3635 	if (ring->use_doorbell)
3636 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3637 
3638 	return 0;
3639 }
3640 
3641 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3642 {
3643 	struct amdgpu_device *adev = ring->adev;
3644 	int j;
3645 
3646 	/* disable the queue if it's active */
3647 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3648 
3649 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3650 
3651 		for (j = 0; j < adev->usec_timeout; j++) {
3652 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3653 				break;
3654 			udelay(1);
3655 		}
3656 
3657 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3658 			DRM_DEBUG("KIQ dequeue request failed.\n");
3659 
3660 			/* Manual disable if dequeue request times out */
3661 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3662 		}
3663 
3664 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3665 		      0);
3666 	}
3667 
3668 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3670 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3672 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3674 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3675 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3676 
3677 	return 0;
3678 }
3679 
3680 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3681 {
3682 	struct amdgpu_device *adev = ring->adev;
3683 	struct v9_mqd *mqd = ring->mqd_ptr;
3684 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3685 
3686 	gfx_v9_0_kiq_setting(ring);
3687 
3688 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3689 		/* reset MQD to a clean status */
3690 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3691 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3692 
3693 		/* reset ring buffer */
3694 		ring->wptr = 0;
3695 		amdgpu_ring_clear_ring(ring);
3696 
3697 		mutex_lock(&adev->srbm_mutex);
3698 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3699 		gfx_v9_0_kiq_init_register(ring);
3700 		soc15_grbm_select(adev, 0, 0, 0, 0);
3701 		mutex_unlock(&adev->srbm_mutex);
3702 	} else {
3703 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3704 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3705 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3706 		mutex_lock(&adev->srbm_mutex);
3707 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3708 		gfx_v9_0_mqd_init(ring);
3709 		gfx_v9_0_kiq_init_register(ring);
3710 		soc15_grbm_select(adev, 0, 0, 0, 0);
3711 		mutex_unlock(&adev->srbm_mutex);
3712 
3713 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3714 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3715 	}
3716 
3717 	return 0;
3718 }
3719 
3720 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3721 {
3722 	struct amdgpu_device *adev = ring->adev;
3723 	struct v9_mqd *mqd = ring->mqd_ptr;
3724 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3725 
3726 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3727 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3728 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3729 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3730 		mutex_lock(&adev->srbm_mutex);
3731 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3732 		gfx_v9_0_mqd_init(ring);
3733 		soc15_grbm_select(adev, 0, 0, 0, 0);
3734 		mutex_unlock(&adev->srbm_mutex);
3735 
3736 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3737 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3738 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3739 		/* reset MQD to a clean status */
3740 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3741 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3742 
3743 		/* reset ring buffer */
3744 		ring->wptr = 0;
3745 		amdgpu_ring_clear_ring(ring);
3746 	} else {
3747 		amdgpu_ring_clear_ring(ring);
3748 	}
3749 
3750 	return 0;
3751 }
3752 
3753 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3754 {
3755 	struct amdgpu_ring *ring;
3756 	int r;
3757 
3758 	ring = &adev->gfx.kiq.ring;
3759 
3760 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3761 	if (unlikely(r != 0))
3762 		return r;
3763 
3764 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3765 	if (unlikely(r != 0))
3766 		return r;
3767 
3768 	gfx_v9_0_kiq_init_queue(ring);
3769 	amdgpu_bo_kunmap(ring->mqd_obj);
3770 	ring->mqd_ptr = NULL;
3771 	amdgpu_bo_unreserve(ring->mqd_obj);
3772 	ring->sched.ready = true;
3773 	return 0;
3774 }
3775 
3776 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3777 {
3778 	struct amdgpu_ring *ring = NULL;
3779 	int r = 0, i;
3780 
3781 	gfx_v9_0_cp_compute_enable(adev, true);
3782 
3783 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3784 		ring = &adev->gfx.compute_ring[i];
3785 
3786 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3787 		if (unlikely(r != 0))
3788 			goto done;
3789 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3790 		if (!r) {
3791 			r = gfx_v9_0_kcq_init_queue(ring);
3792 			amdgpu_bo_kunmap(ring->mqd_obj);
3793 			ring->mqd_ptr = NULL;
3794 		}
3795 		amdgpu_bo_unreserve(ring->mqd_obj);
3796 		if (r)
3797 			goto done;
3798 	}
3799 
3800 	r = gfx_v9_0_kiq_kcq_enable(adev);
3801 done:
3802 	return r;
3803 }
3804 
3805 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3806 {
3807 	int r, i;
3808 	struct amdgpu_ring *ring;
3809 
3810 	if (!(adev->flags & AMD_IS_APU))
3811 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3812 
3813 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3814 		if (adev->asic_type != CHIP_ARCTURUS) {
3815 			/* legacy firmware loading */
3816 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3817 			if (r)
3818 				return r;
3819 		}
3820 
3821 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3822 		if (r)
3823 			return r;
3824 	}
3825 
3826 	r = gfx_v9_0_kiq_resume(adev);
3827 	if (r)
3828 		return r;
3829 
3830 	if (adev->asic_type != CHIP_ARCTURUS) {
3831 		r = gfx_v9_0_cp_gfx_resume(adev);
3832 		if (r)
3833 			return r;
3834 	}
3835 
3836 	r = gfx_v9_0_kcq_resume(adev);
3837 	if (r)
3838 		return r;
3839 
3840 	if (adev->asic_type != CHIP_ARCTURUS) {
3841 		ring = &adev->gfx.gfx_ring[0];
3842 		r = amdgpu_ring_test_helper(ring);
3843 		if (r)
3844 			return r;
3845 	}
3846 
3847 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3848 		ring = &adev->gfx.compute_ring[i];
3849 		amdgpu_ring_test_helper(ring);
3850 	}
3851 
3852 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3853 
3854 	return 0;
3855 }
3856 
3857 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3858 {
3859 	if (adev->asic_type != CHIP_ARCTURUS)
3860 		gfx_v9_0_cp_gfx_enable(adev, enable);
3861 	gfx_v9_0_cp_compute_enable(adev, enable);
3862 }
3863 
3864 static int gfx_v9_0_hw_init(void *handle)
3865 {
3866 	int r;
3867 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868 
3869 	if (!amdgpu_sriov_vf(adev))
3870 		gfx_v9_0_init_golden_registers(adev);
3871 
3872 	gfx_v9_0_constants_init(adev);
3873 
3874 	r = gfx_v9_0_csb_vram_pin(adev);
3875 	if (r)
3876 		return r;
3877 
3878 	r = adev->gfx.rlc.funcs->resume(adev);
3879 	if (r)
3880 		return r;
3881 
3882 	r = gfx_v9_0_cp_resume(adev);
3883 	if (r)
3884 		return r;
3885 
3886 	if (adev->asic_type != CHIP_ARCTURUS) {
3887 		r = gfx_v9_0_ngg_en(adev);
3888 		if (r)
3889 			return r;
3890 	}
3891 
3892 	return r;
3893 }
3894 
3895 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3896 {
3897 	int r, i;
3898 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3899 
3900 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3901 	if (r)
3902 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3903 
3904 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3905 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3906 
3907 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3908 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3909 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3910 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3911 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3912 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3913 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3914 		amdgpu_ring_write(kiq_ring, 0);
3915 		amdgpu_ring_write(kiq_ring, 0);
3916 		amdgpu_ring_write(kiq_ring, 0);
3917 	}
3918 	r = amdgpu_ring_test_helper(kiq_ring);
3919 	if (r)
3920 		DRM_ERROR("KCQ disable failed\n");
3921 
3922 	return r;
3923 }
3924 
3925 static int gfx_v9_0_hw_fini(void *handle)
3926 {
3927 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3928 
3929 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3930 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3931 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3932 
3933 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3934 	gfx_v9_0_kcq_disable(adev);
3935 
3936 	if (amdgpu_sriov_vf(adev)) {
3937 		gfx_v9_0_cp_gfx_enable(adev, false);
3938 		/* must disable polling for SRIOV when hw finished, otherwise
3939 		 * CPC engine may still keep fetching WB address which is already
3940 		 * invalid after sw finished and trigger DMAR reading error in
3941 		 * hypervisor side.
3942 		 */
3943 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3944 		return 0;
3945 	}
3946 
3947 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3948 	 * otherwise KIQ is hanging when binding back
3949 	 */
3950 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3951 		mutex_lock(&adev->srbm_mutex);
3952 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3953 				adev->gfx.kiq.ring.pipe,
3954 				adev->gfx.kiq.ring.queue, 0);
3955 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3956 		soc15_grbm_select(adev, 0, 0, 0, 0);
3957 		mutex_unlock(&adev->srbm_mutex);
3958 	}
3959 
3960 	gfx_v9_0_cp_enable(adev, false);
3961 	adev->gfx.rlc.funcs->stop(adev);
3962 
3963 	gfx_v9_0_csb_vram_unpin(adev);
3964 
3965 	return 0;
3966 }
3967 
3968 static int gfx_v9_0_suspend(void *handle)
3969 {
3970 	return gfx_v9_0_hw_fini(handle);
3971 }
3972 
3973 static int gfx_v9_0_resume(void *handle)
3974 {
3975 	return gfx_v9_0_hw_init(handle);
3976 }
3977 
3978 static bool gfx_v9_0_is_idle(void *handle)
3979 {
3980 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3981 
3982 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3983 				GRBM_STATUS, GUI_ACTIVE))
3984 		return false;
3985 	else
3986 		return true;
3987 }
3988 
3989 static int gfx_v9_0_wait_for_idle(void *handle)
3990 {
3991 	unsigned i;
3992 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3993 
3994 	for (i = 0; i < adev->usec_timeout; i++) {
3995 		if (gfx_v9_0_is_idle(handle))
3996 			return 0;
3997 		udelay(1);
3998 	}
3999 	return -ETIMEDOUT;
4000 }
4001 
4002 static int gfx_v9_0_soft_reset(void *handle)
4003 {
4004 	u32 grbm_soft_reset = 0;
4005 	u32 tmp;
4006 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4007 
4008 	/* GRBM_STATUS */
4009 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4010 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4011 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4012 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4013 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4014 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4015 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4016 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4017 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4018 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4019 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4020 	}
4021 
4022 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4023 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4024 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4025 	}
4026 
4027 	/* GRBM_STATUS2 */
4028 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4029 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4030 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4031 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4032 
4033 
4034 	if (grbm_soft_reset) {
4035 		/* stop the rlc */
4036 		adev->gfx.rlc.funcs->stop(adev);
4037 
4038 		if (adev->asic_type != CHIP_ARCTURUS)
4039 			/* Disable GFX parsing/prefetching */
4040 			gfx_v9_0_cp_gfx_enable(adev, false);
4041 
4042 		/* Disable MEC parsing/prefetching */
4043 		gfx_v9_0_cp_compute_enable(adev, false);
4044 
4045 		if (grbm_soft_reset) {
4046 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4047 			tmp |= grbm_soft_reset;
4048 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4049 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4050 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4051 
4052 			udelay(50);
4053 
4054 			tmp &= ~grbm_soft_reset;
4055 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4056 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4057 		}
4058 
4059 		/* Wait a little for things to settle down */
4060 		udelay(50);
4061 	}
4062 	return 0;
4063 }
4064 
4065 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4066 {
4067 	uint64_t clock;
4068 
4069 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4070 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4071 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4072 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4073 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4074 	return clock;
4075 }
4076 
4077 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4078 					  uint32_t vmid,
4079 					  uint32_t gds_base, uint32_t gds_size,
4080 					  uint32_t gws_base, uint32_t gws_size,
4081 					  uint32_t oa_base, uint32_t oa_size)
4082 {
4083 	struct amdgpu_device *adev = ring->adev;
4084 
4085 	/* GDS Base */
4086 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4087 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4088 				   gds_base);
4089 
4090 	/* GDS Size */
4091 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4092 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4093 				   gds_size);
4094 
4095 	/* GWS */
4096 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4097 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4098 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4099 
4100 	/* OA */
4101 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4102 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4103 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4104 }
4105 
4106 static const u32 vgpr_init_compute_shader[] =
4107 {
4108 	0xb07c0000, 0xbe8000ff,
4109 	0x000000f8, 0xbf110800,
4110 	0x7e000280, 0x7e020280,
4111 	0x7e040280, 0x7e060280,
4112 	0x7e080280, 0x7e0a0280,
4113 	0x7e0c0280, 0x7e0e0280,
4114 	0x80808800, 0xbe803200,
4115 	0xbf84fff5, 0xbf9c0000,
4116 	0xd28c0001, 0x0001007f,
4117 	0xd28d0001, 0x0002027e,
4118 	0x10020288, 0xb8810904,
4119 	0xb7814000, 0xd1196a01,
4120 	0x00000301, 0xbe800087,
4121 	0xbefc00c1, 0xd89c4000,
4122 	0x00020201, 0xd89cc080,
4123 	0x00040401, 0x320202ff,
4124 	0x00000800, 0x80808100,
4125 	0xbf84fff8, 0x7e020280,
4126 	0xbf810000, 0x00000000,
4127 };
4128 
4129 static const u32 sgpr_init_compute_shader[] =
4130 {
4131 	0xb07c0000, 0xbe8000ff,
4132 	0x0000005f, 0xbee50080,
4133 	0xbe812c65, 0xbe822c65,
4134 	0xbe832c65, 0xbe842c65,
4135 	0xbe852c65, 0xb77c0005,
4136 	0x80808500, 0xbf84fff8,
4137 	0xbe800080, 0xbf810000,
4138 };
4139 
4140 static const struct soc15_reg_entry vgpr_init_regs[] = {
4141    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4151 };
4152 
4153 static const struct soc15_reg_entry sgpr_init_regs[] = {
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4164 };
4165 
4166 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4167    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4168    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4169    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4173    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4174    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4175    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4176    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4177    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4178    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4179    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4180    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4181    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4183    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4185    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4186    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4187    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4188    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4189    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4194    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4195    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4196    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4198    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4199 };
4200 
4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4202 {
4203 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4204 	int i, r;
4205 
4206 	r = amdgpu_ring_alloc(ring, 7);
4207 	if (r) {
4208 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4209 			ring->name, r);
4210 		return r;
4211 	}
4212 
4213 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4214 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4215 
4216 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4217 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4218 				PACKET3_DMA_DATA_DST_SEL(1) |
4219 				PACKET3_DMA_DATA_SRC_SEL(2) |
4220 				PACKET3_DMA_DATA_ENGINE(0)));
4221 	amdgpu_ring_write(ring, 0);
4222 	amdgpu_ring_write(ring, 0);
4223 	amdgpu_ring_write(ring, 0);
4224 	amdgpu_ring_write(ring, 0);
4225 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4226 				adev->gds.gds_size);
4227 
4228 	amdgpu_ring_commit(ring);
4229 
4230 	for (i = 0; i < adev->usec_timeout; i++) {
4231 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4232 			break;
4233 		udelay(1);
4234 	}
4235 
4236 	if (i >= adev->usec_timeout)
4237 		r = -ETIMEDOUT;
4238 
4239 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4240 
4241 	return r;
4242 }
4243 
4244 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4245 {
4246 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4247 	struct amdgpu_ib ib;
4248 	struct dma_fence *f = NULL;
4249 	int r, i, j, k;
4250 	unsigned total_size, vgpr_offset, sgpr_offset;
4251 	u64 gpu_addr;
4252 
4253 	/* only support when RAS is enabled */
4254 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4255 		return 0;
4256 
4257 	/* bail if the compute ring is not ready */
4258 	if (!ring->sched.ready)
4259 		return 0;
4260 
4261 	total_size =
4262 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4263 	total_size +=
4264 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4265 	total_size = ALIGN(total_size, 256);
4266 	vgpr_offset = total_size;
4267 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4268 	sgpr_offset = total_size;
4269 	total_size += sizeof(sgpr_init_compute_shader);
4270 
4271 	/* allocate an indirect buffer to put the commands in */
4272 	memset(&ib, 0, sizeof(ib));
4273 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4274 	if (r) {
4275 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4276 		return r;
4277 	}
4278 
4279 	/* load the compute shaders */
4280 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4281 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4282 
4283 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4284 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4285 
4286 	/* init the ib length to 0 */
4287 	ib.length_dw = 0;
4288 
4289 	/* VGPR */
4290 	/* write the register state for the compute dispatch */
4291 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4292 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4293 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4294 								- PACKET3_SET_SH_REG_START;
4295 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4296 	}
4297 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4298 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4299 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4300 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4301 							- PACKET3_SET_SH_REG_START;
4302 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4303 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4304 
4305 	/* write dispatch packet */
4306 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4307 	ib.ptr[ib.length_dw++] = 128; /* x */
4308 	ib.ptr[ib.length_dw++] = 1; /* y */
4309 	ib.ptr[ib.length_dw++] = 1; /* z */
4310 	ib.ptr[ib.length_dw++] =
4311 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4312 
4313 	/* write CS partial flush packet */
4314 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4315 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4316 
4317 	/* SGPR */
4318 	/* write the register state for the compute dispatch */
4319 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4320 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4321 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4322 								- PACKET3_SET_SH_REG_START;
4323 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4324 	}
4325 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4326 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4327 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4328 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4329 							- PACKET3_SET_SH_REG_START;
4330 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4331 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4332 
4333 	/* write dispatch packet */
4334 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4335 	ib.ptr[ib.length_dw++] = 128; /* x */
4336 	ib.ptr[ib.length_dw++] = 1; /* y */
4337 	ib.ptr[ib.length_dw++] = 1; /* z */
4338 	ib.ptr[ib.length_dw++] =
4339 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4340 
4341 	/* write CS partial flush packet */
4342 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4343 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4344 
4345 	/* shedule the ib on the ring */
4346 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4347 	if (r) {
4348 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4349 		goto fail;
4350 	}
4351 
4352 	/* wait for the GPU to finish processing the IB */
4353 	r = dma_fence_wait(f, false);
4354 	if (r) {
4355 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4356 		goto fail;
4357 	}
4358 
4359 	/* read back registers to clear the counters */
4360 	mutex_lock(&adev->grbm_idx_mutex);
4361 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4362 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4363 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4364 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4365 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4366 			}
4367 		}
4368 	}
4369 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4370 	mutex_unlock(&adev->grbm_idx_mutex);
4371 
4372 fail:
4373 	amdgpu_ib_free(adev, &ib, NULL);
4374 	dma_fence_put(f);
4375 
4376 	return r;
4377 }
4378 
4379 static int gfx_v9_0_early_init(void *handle)
4380 {
4381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4382 
4383 	if (adev->asic_type == CHIP_ARCTURUS)
4384 		adev->gfx.num_gfx_rings = 0;
4385 	else
4386 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4387 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4388 	gfx_v9_0_set_ring_funcs(adev);
4389 	gfx_v9_0_set_irq_funcs(adev);
4390 	gfx_v9_0_set_gds_init(adev);
4391 	gfx_v9_0_set_rlc_funcs(adev);
4392 
4393 	return 0;
4394 }
4395 
4396 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4397 		struct ras_err_data *err_data,
4398 		struct amdgpu_iv_entry *entry);
4399 
4400 static int gfx_v9_0_ecc_late_init(void *handle)
4401 {
4402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4403 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4404 	struct ras_ih_if ih_info = {
4405 		.cb = gfx_v9_0_process_ras_data_cb,
4406 	};
4407 	struct ras_fs_if fs_info = {
4408 		.sysfs_name = "gfx_err_count",
4409 		.debugfs_name = "gfx_err_inject",
4410 	};
4411 	struct ras_common_if ras_block = {
4412 		.block = AMDGPU_RAS_BLOCK__GFX,
4413 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4414 		.sub_block_index = 0,
4415 		.name = "gfx",
4416 	};
4417 	int r;
4418 
4419 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4420 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4421 		return 0;
4422 	}
4423 
4424 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4425 	if (r)
4426 		return r;
4427 
4428 	/* requires IBs so do in late init after IB pool is initialized */
4429 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4430 	if (r)
4431 		return r;
4432 
4433 	/* handle resume path. */
4434 	if (*ras_if) {
4435 		/* resend ras TA enable cmd during resume.
4436 		 * prepare to handle failure.
4437 		 */
4438 		ih_info.head = **ras_if;
4439 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4440 		if (r) {
4441 			if (r == -EAGAIN) {
4442 				/* request a gpu reset. will run again. */
4443 				amdgpu_ras_request_reset_on_boot(adev,
4444 						AMDGPU_RAS_BLOCK__GFX);
4445 				return 0;
4446 			}
4447 			/* fail to enable ras, cleanup all. */
4448 			goto irq;
4449 		}
4450 		/* enable successfully. continue. */
4451 		goto resume;
4452 	}
4453 
4454 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4455 	if (!*ras_if)
4456 		return -ENOMEM;
4457 
4458 	**ras_if = ras_block;
4459 
4460 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4461 	if (r) {
4462 		if (r == -EAGAIN) {
4463 			amdgpu_ras_request_reset_on_boot(adev,
4464 					AMDGPU_RAS_BLOCK__GFX);
4465 			r = 0;
4466 		}
4467 		goto feature;
4468 	}
4469 
4470 	ih_info.head = **ras_if;
4471 	fs_info.head = **ras_if;
4472 
4473 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4474 	if (r)
4475 		goto interrupt;
4476 
4477 	amdgpu_ras_debugfs_create(adev, &fs_info);
4478 
4479 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4480 	if (r)
4481 		goto sysfs;
4482 resume:
4483 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4484 	if (r)
4485 		goto irq;
4486 
4487 	return 0;
4488 irq:
4489 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4490 sysfs:
4491 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4492 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4493 interrupt:
4494 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4495 feature:
4496 	kfree(*ras_if);
4497 	*ras_if = NULL;
4498 	return r;
4499 }
4500 
4501 static int gfx_v9_0_late_init(void *handle)
4502 {
4503 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4504 	int r;
4505 
4506 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4507 	if (r)
4508 		return r;
4509 
4510 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4511 	if (r)
4512 		return r;
4513 
4514 	r = gfx_v9_0_ecc_late_init(handle);
4515 	if (r)
4516 		return r;
4517 
4518 	return 0;
4519 }
4520 
4521 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4522 {
4523 	uint32_t rlc_setting;
4524 
4525 	/* if RLC is not enabled, do nothing */
4526 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4527 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4528 		return false;
4529 
4530 	return true;
4531 }
4532 
4533 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4534 {
4535 	uint32_t data;
4536 	unsigned i;
4537 
4538 	data = RLC_SAFE_MODE__CMD_MASK;
4539 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4540 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4541 
4542 	/* wait for RLC_SAFE_MODE */
4543 	for (i = 0; i < adev->usec_timeout; i++) {
4544 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4545 			break;
4546 		udelay(1);
4547 	}
4548 }
4549 
4550 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4551 {
4552 	uint32_t data;
4553 
4554 	data = RLC_SAFE_MODE__CMD_MASK;
4555 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4556 }
4557 
4558 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4559 						bool enable)
4560 {
4561 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4562 
4563 	if (is_support_sw_smu(adev) && !enable)
4564 		smu_set_gfx_cgpg(&adev->smu, enable);
4565 
4566 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4567 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4568 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4569 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4570 	} else {
4571 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4572 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4573 	}
4574 
4575 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4576 }
4577 
4578 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4579 						bool enable)
4580 {
4581 	/* TODO: double check if we need to perform under safe mode */
4582 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4583 
4584 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4585 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4586 	else
4587 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4588 
4589 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4590 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4591 	else
4592 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4593 
4594 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4595 }
4596 
4597 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4598 						      bool enable)
4599 {
4600 	uint32_t data, def;
4601 
4602 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4603 
4604 	/* It is disabled by HW by default */
4605 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4606 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4607 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4608 
4609 		if (adev->asic_type != CHIP_VEGA12)
4610 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4611 
4612 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4613 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4614 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4615 
4616 		/* only for Vega10 & Raven1 */
4617 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4618 
4619 		if (def != data)
4620 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4621 
4622 		/* MGLS is a global flag to control all MGLS in GFX */
4623 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4624 			/* 2 - RLC memory Light sleep */
4625 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4626 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4627 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4628 				if (def != data)
4629 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4630 			}
4631 			/* 3 - CP memory Light sleep */
4632 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4633 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4634 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4635 				if (def != data)
4636 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4637 			}
4638 		}
4639 	} else {
4640 		/* 1 - MGCG_OVERRIDE */
4641 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4642 
4643 		if (adev->asic_type != CHIP_VEGA12)
4644 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4645 
4646 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4647 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4648 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4649 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4650 
4651 		if (def != data)
4652 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4653 
4654 		/* 2 - disable MGLS in RLC */
4655 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4656 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4657 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4658 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4659 		}
4660 
4661 		/* 3 - disable MGLS in CP */
4662 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4663 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4664 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4665 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4666 		}
4667 	}
4668 
4669 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4670 }
4671 
4672 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4673 					   bool enable)
4674 {
4675 	uint32_t data, def;
4676 
4677 	if (adev->asic_type == CHIP_ARCTURUS)
4678 		return;
4679 
4680 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4681 
4682 	/* Enable 3D CGCG/CGLS */
4683 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4684 		/* write cmd to clear cgcg/cgls ov */
4685 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4686 		/* unset CGCG override */
4687 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4688 		/* update CGCG and CGLS override bits */
4689 		if (def != data)
4690 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4691 
4692 		/* enable 3Dcgcg FSM(0x0000363f) */
4693 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4694 
4695 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4696 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4697 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4698 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4699 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4700 		if (def != data)
4701 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4702 
4703 		/* set IDLE_POLL_COUNT(0x00900100) */
4704 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4705 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4706 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4707 		if (def != data)
4708 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4709 	} else {
4710 		/* Disable CGCG/CGLS */
4711 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4712 		/* disable cgcg, cgls should be disabled */
4713 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4714 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4715 		/* disable cgcg and cgls in FSM */
4716 		if (def != data)
4717 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4718 	}
4719 
4720 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4721 }
4722 
4723 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4724 						      bool enable)
4725 {
4726 	uint32_t def, data;
4727 
4728 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4729 
4730 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4731 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4732 		/* unset CGCG override */
4733 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4734 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4735 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4736 		else
4737 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4738 		/* update CGCG and CGLS override bits */
4739 		if (def != data)
4740 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4741 
4742 		/* enable cgcg FSM(0x0000363F) */
4743 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4744 
4745 		if (adev->asic_type == CHIP_ARCTURUS)
4746 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4747 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4748 		else
4749 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4750 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4751 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4752 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4753 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4754 		if (def != data)
4755 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4756 
4757 		/* set IDLE_POLL_COUNT(0x00900100) */
4758 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4759 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4760 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4761 		if (def != data)
4762 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4763 	} else {
4764 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4765 		/* reset CGCG/CGLS bits */
4766 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4767 		/* disable cgcg and cgls in FSM */
4768 		if (def != data)
4769 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4770 	}
4771 
4772 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4773 }
4774 
4775 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4776 					    bool enable)
4777 {
4778 	if (enable) {
4779 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4780 		 * ===  MGCG + MGLS ===
4781 		 */
4782 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4783 		/* ===  CGCG /CGLS for GFX 3D Only === */
4784 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4785 		/* ===  CGCG + CGLS === */
4786 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4787 	} else {
4788 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4789 		 * ===  CGCG + CGLS ===
4790 		 */
4791 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4792 		/* ===  CGCG /CGLS for GFX 3D Only === */
4793 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4794 		/* ===  MGCG + MGLS === */
4795 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4796 	}
4797 	return 0;
4798 }
4799 
4800 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4801 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4802 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4803 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4804 	.init = gfx_v9_0_rlc_init,
4805 	.get_csb_size = gfx_v9_0_get_csb_size,
4806 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4807 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4808 	.resume = gfx_v9_0_rlc_resume,
4809 	.stop = gfx_v9_0_rlc_stop,
4810 	.reset = gfx_v9_0_rlc_reset,
4811 	.start = gfx_v9_0_rlc_start
4812 };
4813 
4814 static int gfx_v9_0_set_powergating_state(void *handle,
4815 					  enum amd_powergating_state state)
4816 {
4817 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4818 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4819 
4820 	switch (adev->asic_type) {
4821 	case CHIP_RAVEN:
4822 	case CHIP_RENOIR:
4823 		if (!enable) {
4824 			amdgpu_gfx_off_ctrl(adev, false);
4825 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4826 		}
4827 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4828 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4829 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4830 		} else {
4831 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4832 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4833 		}
4834 
4835 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4836 			gfx_v9_0_enable_cp_power_gating(adev, true);
4837 		else
4838 			gfx_v9_0_enable_cp_power_gating(adev, false);
4839 
4840 		/* update gfx cgpg state */
4841 		if (is_support_sw_smu(adev) && enable)
4842 			smu_set_gfx_cgpg(&adev->smu, enable);
4843 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4844 
4845 		/* update mgcg state */
4846 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4847 
4848 		if (enable)
4849 			amdgpu_gfx_off_ctrl(adev, true);
4850 		break;
4851 	case CHIP_VEGA12:
4852 		if (!enable) {
4853 			amdgpu_gfx_off_ctrl(adev, false);
4854 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4855 		} else {
4856 			amdgpu_gfx_off_ctrl(adev, true);
4857 		}
4858 		break;
4859 	default:
4860 		break;
4861 	}
4862 
4863 	return 0;
4864 }
4865 
4866 static int gfx_v9_0_set_clockgating_state(void *handle,
4867 					  enum amd_clockgating_state state)
4868 {
4869 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4870 
4871 	if (amdgpu_sriov_vf(adev))
4872 		return 0;
4873 
4874 	switch (adev->asic_type) {
4875 	case CHIP_VEGA10:
4876 	case CHIP_VEGA12:
4877 	case CHIP_VEGA20:
4878 	case CHIP_RAVEN:
4879 	case CHIP_ARCTURUS:
4880 	case CHIP_RENOIR:
4881 		gfx_v9_0_update_gfx_clock_gating(adev,
4882 						 state == AMD_CG_STATE_GATE ? true : false);
4883 		break;
4884 	default:
4885 		break;
4886 	}
4887 	return 0;
4888 }
4889 
4890 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4891 {
4892 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893 	int data;
4894 
4895 	if (amdgpu_sriov_vf(adev))
4896 		*flags = 0;
4897 
4898 	/* AMD_CG_SUPPORT_GFX_MGCG */
4899 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4900 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4901 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4902 
4903 	/* AMD_CG_SUPPORT_GFX_CGCG */
4904 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4905 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4906 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4907 
4908 	/* AMD_CG_SUPPORT_GFX_CGLS */
4909 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4910 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4911 
4912 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4913 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4914 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4915 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4916 
4917 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4918 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4919 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4920 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4921 
4922 	if (adev->asic_type != CHIP_ARCTURUS) {
4923 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4924 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4925 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4926 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4927 
4928 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4929 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4930 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4931 	}
4932 }
4933 
4934 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4935 {
4936 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4937 }
4938 
4939 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4940 {
4941 	struct amdgpu_device *adev = ring->adev;
4942 	u64 wptr;
4943 
4944 	/* XXX check if swapping is necessary on BE */
4945 	if (ring->use_doorbell) {
4946 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4947 	} else {
4948 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4949 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4950 	}
4951 
4952 	return wptr;
4953 }
4954 
4955 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4956 {
4957 	struct amdgpu_device *adev = ring->adev;
4958 
4959 	if (ring->use_doorbell) {
4960 		/* XXX check if swapping is necessary on BE */
4961 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4962 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4963 	} else {
4964 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4965 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4966 	}
4967 }
4968 
4969 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4970 {
4971 	struct amdgpu_device *adev = ring->adev;
4972 	u32 ref_and_mask, reg_mem_engine;
4973 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4974 
4975 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4976 		switch (ring->me) {
4977 		case 1:
4978 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4979 			break;
4980 		case 2:
4981 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4982 			break;
4983 		default:
4984 			return;
4985 		}
4986 		reg_mem_engine = 0;
4987 	} else {
4988 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4989 		reg_mem_engine = 1; /* pfp */
4990 	}
4991 
4992 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4993 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4994 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4995 			      ref_and_mask, ref_and_mask, 0x20);
4996 }
4997 
4998 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4999 					struct amdgpu_job *job,
5000 					struct amdgpu_ib *ib,
5001 					uint32_t flags)
5002 {
5003 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5004 	u32 header, control = 0;
5005 
5006 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5007 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5008 	else
5009 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5010 
5011 	control |= ib->length_dw | (vmid << 24);
5012 
5013 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5014 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5015 
5016 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5017 			gfx_v9_0_ring_emit_de_meta(ring);
5018 	}
5019 
5020 	amdgpu_ring_write(ring, header);
5021 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5022 	amdgpu_ring_write(ring,
5023 #ifdef __BIG_ENDIAN
5024 		(2 << 0) |
5025 #endif
5026 		lower_32_bits(ib->gpu_addr));
5027 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5028 	amdgpu_ring_write(ring, control);
5029 }
5030 
5031 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5032 					  struct amdgpu_job *job,
5033 					  struct amdgpu_ib *ib,
5034 					  uint32_t flags)
5035 {
5036 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5037 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5038 
5039 	/* Currently, there is a high possibility to get wave ID mismatch
5040 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5041 	 * different wave IDs than the GDS expects. This situation happens
5042 	 * randomly when at least 5 compute pipes use GDS ordered append.
5043 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5044 	 * Those are probably bugs somewhere else in the kernel driver.
5045 	 *
5046 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5047 	 * GDS to 0 for this ring (me/pipe).
5048 	 */
5049 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5050 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5051 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5052 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5053 	}
5054 
5055 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5056 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5057 	amdgpu_ring_write(ring,
5058 #ifdef __BIG_ENDIAN
5059 				(2 << 0) |
5060 #endif
5061 				lower_32_bits(ib->gpu_addr));
5062 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5063 	amdgpu_ring_write(ring, control);
5064 }
5065 
5066 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5067 				     u64 seq, unsigned flags)
5068 {
5069 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5070 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5071 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5072 
5073 	/* RELEASE_MEM - flush caches, send int */
5074 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5075 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5076 					       EOP_TC_NC_ACTION_EN) :
5077 					      (EOP_TCL1_ACTION_EN |
5078 					       EOP_TC_ACTION_EN |
5079 					       EOP_TC_WB_ACTION_EN |
5080 					       EOP_TC_MD_ACTION_EN)) |
5081 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5082 				 EVENT_INDEX(5)));
5083 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5084 
5085 	/*
5086 	 * the address should be Qword aligned if 64bit write, Dword
5087 	 * aligned if only send 32bit data low (discard data high)
5088 	 */
5089 	if (write64bit)
5090 		BUG_ON(addr & 0x7);
5091 	else
5092 		BUG_ON(addr & 0x3);
5093 	amdgpu_ring_write(ring, lower_32_bits(addr));
5094 	amdgpu_ring_write(ring, upper_32_bits(addr));
5095 	amdgpu_ring_write(ring, lower_32_bits(seq));
5096 	amdgpu_ring_write(ring, upper_32_bits(seq));
5097 	amdgpu_ring_write(ring, 0);
5098 }
5099 
5100 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5101 {
5102 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5103 	uint32_t seq = ring->fence_drv.sync_seq;
5104 	uint64_t addr = ring->fence_drv.gpu_addr;
5105 
5106 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5107 			      lower_32_bits(addr), upper_32_bits(addr),
5108 			      seq, 0xffffffff, 4);
5109 }
5110 
5111 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5112 					unsigned vmid, uint64_t pd_addr)
5113 {
5114 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5115 
5116 	/* compute doesn't have PFP */
5117 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5118 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5119 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5120 		amdgpu_ring_write(ring, 0x0);
5121 	}
5122 }
5123 
5124 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5125 {
5126 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5127 }
5128 
5129 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5130 {
5131 	u64 wptr;
5132 
5133 	/* XXX check if swapping is necessary on BE */
5134 	if (ring->use_doorbell)
5135 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5136 	else
5137 		BUG();
5138 	return wptr;
5139 }
5140 
5141 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5142 					   bool acquire)
5143 {
5144 	struct amdgpu_device *adev = ring->adev;
5145 	int pipe_num, tmp, reg;
5146 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5147 
5148 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5149 
5150 	/* first me only has 2 entries, GFX and HP3D */
5151 	if (ring->me > 0)
5152 		pipe_num -= 2;
5153 
5154 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5155 	tmp = RREG32(reg);
5156 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5157 	WREG32(reg, tmp);
5158 }
5159 
5160 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5161 					    struct amdgpu_ring *ring,
5162 					    bool acquire)
5163 {
5164 	int i, pipe;
5165 	bool reserve;
5166 	struct amdgpu_ring *iring;
5167 
5168 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5169 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5170 	if (acquire)
5171 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5172 	else
5173 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5174 
5175 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5176 		/* Clear all reservations - everyone reacquires all resources */
5177 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5178 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5179 						       true);
5180 
5181 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5182 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5183 						       true);
5184 	} else {
5185 		/* Lower all pipes without a current reservation */
5186 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5187 			iring = &adev->gfx.gfx_ring[i];
5188 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5189 							   iring->me,
5190 							   iring->pipe,
5191 							   0);
5192 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5193 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5194 		}
5195 
5196 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5197 			iring = &adev->gfx.compute_ring[i];
5198 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5199 							   iring->me,
5200 							   iring->pipe,
5201 							   0);
5202 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5203 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5204 		}
5205 	}
5206 
5207 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5208 }
5209 
5210 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5211 				      struct amdgpu_ring *ring,
5212 				      bool acquire)
5213 {
5214 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5215 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5216 
5217 	mutex_lock(&adev->srbm_mutex);
5218 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5219 
5220 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5221 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5222 
5223 	soc15_grbm_select(adev, 0, 0, 0, 0);
5224 	mutex_unlock(&adev->srbm_mutex);
5225 }
5226 
5227 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5228 					       enum drm_sched_priority priority)
5229 {
5230 	struct amdgpu_device *adev = ring->adev;
5231 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5232 
5233 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5234 		return;
5235 
5236 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5237 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5238 }
5239 
5240 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5241 {
5242 	struct amdgpu_device *adev = ring->adev;
5243 
5244 	/* XXX check if swapping is necessary on BE */
5245 	if (ring->use_doorbell) {
5246 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5247 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5248 	} else{
5249 		BUG(); /* only DOORBELL method supported on gfx9 now */
5250 	}
5251 }
5252 
5253 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5254 					 u64 seq, unsigned int flags)
5255 {
5256 	struct amdgpu_device *adev = ring->adev;
5257 
5258 	/* we only allocate 32bit for each seq wb address */
5259 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5260 
5261 	/* write fence seq to the "addr" */
5262 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5263 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5264 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5265 	amdgpu_ring_write(ring, lower_32_bits(addr));
5266 	amdgpu_ring_write(ring, upper_32_bits(addr));
5267 	amdgpu_ring_write(ring, lower_32_bits(seq));
5268 
5269 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5270 		/* set register to trigger INT */
5271 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5272 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5273 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5274 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5275 		amdgpu_ring_write(ring, 0);
5276 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5277 	}
5278 }
5279 
5280 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5281 {
5282 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5283 	amdgpu_ring_write(ring, 0);
5284 }
5285 
5286 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5287 {
5288 	struct v9_ce_ib_state ce_payload = {0};
5289 	uint64_t csa_addr;
5290 	int cnt;
5291 
5292 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5293 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5294 
5295 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5296 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5297 				 WRITE_DATA_DST_SEL(8) |
5298 				 WR_CONFIRM) |
5299 				 WRITE_DATA_CACHE_POLICY(0));
5300 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5301 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5302 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5303 }
5304 
5305 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5306 {
5307 	struct v9_de_ib_state de_payload = {0};
5308 	uint64_t csa_addr, gds_addr;
5309 	int cnt;
5310 
5311 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5312 	gds_addr = csa_addr + 4096;
5313 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5314 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5315 
5316 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5317 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5318 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5319 				 WRITE_DATA_DST_SEL(8) |
5320 				 WR_CONFIRM) |
5321 				 WRITE_DATA_CACHE_POLICY(0));
5322 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5323 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5324 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5325 }
5326 
5327 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5328 {
5329 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5330 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5331 }
5332 
5333 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5334 {
5335 	uint32_t dw2 = 0;
5336 
5337 	if (amdgpu_sriov_vf(ring->adev))
5338 		gfx_v9_0_ring_emit_ce_meta(ring);
5339 
5340 	gfx_v9_0_ring_emit_tmz(ring, true);
5341 
5342 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5343 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5344 		/* set load_global_config & load_global_uconfig */
5345 		dw2 |= 0x8001;
5346 		/* set load_cs_sh_regs */
5347 		dw2 |= 0x01000000;
5348 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5349 		dw2 |= 0x10002;
5350 
5351 		/* set load_ce_ram if preamble presented */
5352 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5353 			dw2 |= 0x10000000;
5354 	} else {
5355 		/* still load_ce_ram if this is the first time preamble presented
5356 		 * although there is no context switch happens.
5357 		 */
5358 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5359 			dw2 |= 0x10000000;
5360 	}
5361 
5362 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5363 	amdgpu_ring_write(ring, dw2);
5364 	amdgpu_ring_write(ring, 0);
5365 }
5366 
5367 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5368 {
5369 	unsigned ret;
5370 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5371 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5372 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5373 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5374 	ret = ring->wptr & ring->buf_mask;
5375 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5376 	return ret;
5377 }
5378 
5379 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5380 {
5381 	unsigned cur;
5382 	BUG_ON(offset > ring->buf_mask);
5383 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5384 
5385 	cur = (ring->wptr & ring->buf_mask) - 1;
5386 	if (likely(cur > offset))
5387 		ring->ring[offset] = cur - offset;
5388 	else
5389 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5390 }
5391 
5392 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5393 {
5394 	struct amdgpu_device *adev = ring->adev;
5395 
5396 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5397 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5398 				(5 << 8) |	/* dst: memory */
5399 				(1 << 20));	/* write confirm */
5400 	amdgpu_ring_write(ring, reg);
5401 	amdgpu_ring_write(ring, 0);
5402 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5403 				adev->virt.reg_val_offs * 4));
5404 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5405 				adev->virt.reg_val_offs * 4));
5406 }
5407 
5408 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5409 				    uint32_t val)
5410 {
5411 	uint32_t cmd = 0;
5412 
5413 	switch (ring->funcs->type) {
5414 	case AMDGPU_RING_TYPE_GFX:
5415 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5416 		break;
5417 	case AMDGPU_RING_TYPE_KIQ:
5418 		cmd = (1 << 16); /* no inc addr */
5419 		break;
5420 	default:
5421 		cmd = WR_CONFIRM;
5422 		break;
5423 	}
5424 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5425 	amdgpu_ring_write(ring, cmd);
5426 	amdgpu_ring_write(ring, reg);
5427 	amdgpu_ring_write(ring, 0);
5428 	amdgpu_ring_write(ring, val);
5429 }
5430 
5431 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5432 					uint32_t val, uint32_t mask)
5433 {
5434 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5435 }
5436 
5437 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5438 						  uint32_t reg0, uint32_t reg1,
5439 						  uint32_t ref, uint32_t mask)
5440 {
5441 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5442 	struct amdgpu_device *adev = ring->adev;
5443 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5444 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5445 
5446 	if (fw_version_ok)
5447 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5448 				      ref, mask, 0x20);
5449 	else
5450 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5451 							   ref, mask);
5452 }
5453 
5454 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5455 {
5456 	struct amdgpu_device *adev = ring->adev;
5457 	uint32_t value = 0;
5458 
5459 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5460 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5461 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5462 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5463 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5464 }
5465 
5466 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5467 						 enum amdgpu_interrupt_state state)
5468 {
5469 	switch (state) {
5470 	case AMDGPU_IRQ_STATE_DISABLE:
5471 	case AMDGPU_IRQ_STATE_ENABLE:
5472 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5473 			       TIME_STAMP_INT_ENABLE,
5474 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5475 		break;
5476 	default:
5477 		break;
5478 	}
5479 }
5480 
5481 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5482 						     int me, int pipe,
5483 						     enum amdgpu_interrupt_state state)
5484 {
5485 	u32 mec_int_cntl, mec_int_cntl_reg;
5486 
5487 	/*
5488 	 * amdgpu controls only the first MEC. That's why this function only
5489 	 * handles the setting of interrupts for this specific MEC. All other
5490 	 * pipes' interrupts are set by amdkfd.
5491 	 */
5492 
5493 	if (me == 1) {
5494 		switch (pipe) {
5495 		case 0:
5496 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5497 			break;
5498 		case 1:
5499 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5500 			break;
5501 		case 2:
5502 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5503 			break;
5504 		case 3:
5505 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5506 			break;
5507 		default:
5508 			DRM_DEBUG("invalid pipe %d\n", pipe);
5509 			return;
5510 		}
5511 	} else {
5512 		DRM_DEBUG("invalid me %d\n", me);
5513 		return;
5514 	}
5515 
5516 	switch (state) {
5517 	case AMDGPU_IRQ_STATE_DISABLE:
5518 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5519 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5520 					     TIME_STAMP_INT_ENABLE, 0);
5521 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5522 		break;
5523 	case AMDGPU_IRQ_STATE_ENABLE:
5524 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5525 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5526 					     TIME_STAMP_INT_ENABLE, 1);
5527 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5528 		break;
5529 	default:
5530 		break;
5531 	}
5532 }
5533 
5534 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5535 					     struct amdgpu_irq_src *source,
5536 					     unsigned type,
5537 					     enum amdgpu_interrupt_state state)
5538 {
5539 	switch (state) {
5540 	case AMDGPU_IRQ_STATE_DISABLE:
5541 	case AMDGPU_IRQ_STATE_ENABLE:
5542 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5543 			       PRIV_REG_INT_ENABLE,
5544 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5545 		break;
5546 	default:
5547 		break;
5548 	}
5549 
5550 	return 0;
5551 }
5552 
5553 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5554 					      struct amdgpu_irq_src *source,
5555 					      unsigned type,
5556 					      enum amdgpu_interrupt_state state)
5557 {
5558 	switch (state) {
5559 	case AMDGPU_IRQ_STATE_DISABLE:
5560 	case AMDGPU_IRQ_STATE_ENABLE:
5561 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5562 			       PRIV_INSTR_INT_ENABLE,
5563 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5564 	default:
5565 		break;
5566 	}
5567 
5568 	return 0;
5569 }
5570 
5571 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5572 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5573 			CP_ECC_ERROR_INT_ENABLE, 1)
5574 
5575 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5576 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5577 			CP_ECC_ERROR_INT_ENABLE, 0)
5578 
5579 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5580 					      struct amdgpu_irq_src *source,
5581 					      unsigned type,
5582 					      enum amdgpu_interrupt_state state)
5583 {
5584 	switch (state) {
5585 	case AMDGPU_IRQ_STATE_DISABLE:
5586 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5587 				CP_ECC_ERROR_INT_ENABLE, 0);
5588 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5589 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5590 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5591 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5592 		break;
5593 
5594 	case AMDGPU_IRQ_STATE_ENABLE:
5595 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5596 				CP_ECC_ERROR_INT_ENABLE, 1);
5597 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5598 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5599 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5600 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5601 		break;
5602 	default:
5603 		break;
5604 	}
5605 
5606 	return 0;
5607 }
5608 
5609 
5610 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5611 					    struct amdgpu_irq_src *src,
5612 					    unsigned type,
5613 					    enum amdgpu_interrupt_state state)
5614 {
5615 	switch (type) {
5616 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5617 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5618 		break;
5619 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5620 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5621 		break;
5622 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5623 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5624 		break;
5625 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5626 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5627 		break;
5628 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5629 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5630 		break;
5631 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5632 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5633 		break;
5634 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5635 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5636 		break;
5637 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5638 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5639 		break;
5640 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5641 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5642 		break;
5643 	default:
5644 		break;
5645 	}
5646 	return 0;
5647 }
5648 
5649 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5650 			    struct amdgpu_irq_src *source,
5651 			    struct amdgpu_iv_entry *entry)
5652 {
5653 	int i;
5654 	u8 me_id, pipe_id, queue_id;
5655 	struct amdgpu_ring *ring;
5656 
5657 	DRM_DEBUG("IH: CP EOP\n");
5658 	me_id = (entry->ring_id & 0x0c) >> 2;
5659 	pipe_id = (entry->ring_id & 0x03) >> 0;
5660 	queue_id = (entry->ring_id & 0x70) >> 4;
5661 
5662 	switch (me_id) {
5663 	case 0:
5664 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5665 		break;
5666 	case 1:
5667 	case 2:
5668 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5669 			ring = &adev->gfx.compute_ring[i];
5670 			/* Per-queue interrupt is supported for MEC starting from VI.
5671 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5672 			  */
5673 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5674 				amdgpu_fence_process(ring);
5675 		}
5676 		break;
5677 	}
5678 	return 0;
5679 }
5680 
5681 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5682 			   struct amdgpu_iv_entry *entry)
5683 {
5684 	u8 me_id, pipe_id, queue_id;
5685 	struct amdgpu_ring *ring;
5686 	int i;
5687 
5688 	me_id = (entry->ring_id & 0x0c) >> 2;
5689 	pipe_id = (entry->ring_id & 0x03) >> 0;
5690 	queue_id = (entry->ring_id & 0x70) >> 4;
5691 
5692 	switch (me_id) {
5693 	case 0:
5694 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5695 		break;
5696 	case 1:
5697 	case 2:
5698 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5699 			ring = &adev->gfx.compute_ring[i];
5700 			if (ring->me == me_id && ring->pipe == pipe_id &&
5701 			    ring->queue == queue_id)
5702 				drm_sched_fault(&ring->sched);
5703 		}
5704 		break;
5705 	}
5706 }
5707 
5708 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5709 				 struct amdgpu_irq_src *source,
5710 				 struct amdgpu_iv_entry *entry)
5711 {
5712 	DRM_ERROR("Illegal register access in command stream\n");
5713 	gfx_v9_0_fault(adev, entry);
5714 	return 0;
5715 }
5716 
5717 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5718 				  struct amdgpu_irq_src *source,
5719 				  struct amdgpu_iv_entry *entry)
5720 {
5721 	DRM_ERROR("Illegal instruction in command stream\n");
5722 	gfx_v9_0_fault(adev, entry);
5723 	return 0;
5724 }
5725 
5726 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5727 		struct ras_err_data *err_data,
5728 		struct amdgpu_iv_entry *entry)
5729 {
5730 	/* TODO ue will trigger an interrupt. */
5731 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5732 	if (adev->gfx.funcs->query_ras_error_count)
5733 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5734 	amdgpu_ras_reset_gpu(adev, 0);
5735 	return AMDGPU_RAS_SUCCESS;
5736 }
5737 
5738 static const struct {
5739 	const char *name;
5740 	uint32_t ip;
5741 	uint32_t inst;
5742 	uint32_t seg;
5743 	uint32_t reg_offset;
5744 	uint32_t per_se_instance;
5745 	int32_t num_instance;
5746 	uint32_t sec_count_mask;
5747 	uint32_t ded_count_mask;
5748 } gfx_ras_edc_regs[] = {
5749 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5750 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5751 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5752 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5753 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5754 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5755 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5756 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5757 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5758 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5759 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5760 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5761 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5762 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5763 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5764 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5765 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5766 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5767 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5768 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5769 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5770 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5771 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5772 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5773 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5774 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5775 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5776 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5777 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5778 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5779 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5780 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5781 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5782 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5783 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5784 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5785 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5786 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5787 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5788 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5789 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5790 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5791 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5792 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5793 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5794 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5795 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5796 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5797 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5798 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5799 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5800 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5801 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5802 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5803 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5804 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5805 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5806 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5807 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5808 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5809 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5810 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5811 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5812 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5813 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5814 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5815 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5816 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5817 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5818 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5819 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5820 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5821 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5822 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5823 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5824 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5825 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5826 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5827 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5828 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5829 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5830 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5831 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5832 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5833 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5834 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5835 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5836 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5837 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5838 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5839 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5840 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5841 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5842 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5843 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5845 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5846 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5847 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5848 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5849 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5851 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5852 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5853 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5854 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5855 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5856 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5857 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5858 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5859 	  0 },
5860 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5861 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5862 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5863 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5864 	  0 },
5865 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5866 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5867 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5868 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5869 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5870 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5871 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5872 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5873 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5874 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5875 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5876 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5877 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5878 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5879 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5880 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5881 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5882 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5883 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5884 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5885 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5886 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5887 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5888 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5889 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5890 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5891 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5892 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5893 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5894 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5895 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5896 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5897 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5898 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5899 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5900 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5901 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5902 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5903 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5904 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5905 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5906 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5907 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5908 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5909 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5910 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5911 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5912 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5913 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5914 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5915 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5916 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5917 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5918 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5919 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5920 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5921 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5922 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5923 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5924 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5925 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5926 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5927 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5928 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5929 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5930 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5931 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5932 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5933 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5934 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5935 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5936 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5937 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5938 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5939 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5940 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5941 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5942 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5943 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5944 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5945 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5946 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5947 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5948 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5949 	  0 },
5950 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5951 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5952 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5953 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5954 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5955 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5956 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5957 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5958 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5959 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5960 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5961 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5962 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5963 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5964 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5965 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5966 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5967 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5968 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5969 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5970 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5971 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5972 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5973 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5974 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5975 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5976 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5977 	  0 },
5978 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5979 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5980 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5981 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5982 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5983 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5984 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5985 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5986 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5987 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5988 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5989 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5990 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5991 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5992 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5993 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5994 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5995 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5996 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5997 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5998 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5999 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6000 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6001 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6002 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6003 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6004 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6005 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6006 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6008 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6009 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6010 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6011 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6012 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6013 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6014 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6015 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6016 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6017 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6018 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6019 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6020 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6021 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6022 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6023 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6024 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6025 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6027 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6028 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6029 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6030 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6031 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6033 };
6034 
6035 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6036 				     void *inject_if)
6037 {
6038 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6039 	int ret;
6040 	struct ta_ras_trigger_error_input block_info = { 0 };
6041 
6042 	if (adev->asic_type != CHIP_VEGA20)
6043 		return -EINVAL;
6044 
6045 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6046 		return -EINVAL;
6047 
6048 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6049 		return -EPERM;
6050 
6051 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6052 	      info->head.type)) {
6053 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6054 			ras_gfx_subblocks[info->head.sub_block_index].name,
6055 			info->head.type);
6056 		return -EPERM;
6057 	}
6058 
6059 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6060 	      info->head.type)) {
6061 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6062 			ras_gfx_subblocks[info->head.sub_block_index].name,
6063 			info->head.type);
6064 		return -EPERM;
6065 	}
6066 
6067 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6068 	block_info.sub_block_index =
6069 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6070 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6071 	block_info.address = info->address;
6072 	block_info.value = info->value;
6073 
6074 	mutex_lock(&adev->grbm_idx_mutex);
6075 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6076 	mutex_unlock(&adev->grbm_idx_mutex);
6077 
6078 	return ret;
6079 }
6080 
6081 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6082 					  void *ras_error_status)
6083 {
6084 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6085 	uint32_t sec_count, ded_count;
6086 	uint32_t i;
6087 	uint32_t reg_value;
6088 	uint32_t se_id, instance_id;
6089 
6090 	if (adev->asic_type != CHIP_VEGA20)
6091 		return -EINVAL;
6092 
6093 	err_data->ue_count = 0;
6094 	err_data->ce_count = 0;
6095 
6096 	mutex_lock(&adev->grbm_idx_mutex);
6097 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6098 		for (instance_id = 0; instance_id < 256; instance_id++) {
6099 			for (i = 0;
6100 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6101 			     i++) {
6102 				if (se_id != 0 &&
6103 				    !gfx_ras_edc_regs[i].per_se_instance)
6104 					continue;
6105 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6106 					continue;
6107 
6108 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6109 						      instance_id);
6110 
6111 				reg_value = RREG32(
6112 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6113 							[gfx_ras_edc_regs[i].inst]
6114 							[gfx_ras_edc_regs[i].seg] +
6115 					gfx_ras_edc_regs[i].reg_offset);
6116 				sec_count = reg_value &
6117 					    gfx_ras_edc_regs[i].sec_count_mask;
6118 				ded_count = reg_value &
6119 					    gfx_ras_edc_regs[i].ded_count_mask;
6120 				if (sec_count) {
6121 					DRM_INFO(
6122 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6123 						se_id, instance_id,
6124 						gfx_ras_edc_regs[i].name,
6125 						sec_count);
6126 					err_data->ce_count++;
6127 				}
6128 
6129 				if (ded_count) {
6130 					DRM_INFO(
6131 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6132 						se_id, instance_id,
6133 						gfx_ras_edc_regs[i].name,
6134 						ded_count);
6135 					err_data->ue_count++;
6136 				}
6137 			}
6138 		}
6139 	}
6140 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6141 	mutex_unlock(&adev->grbm_idx_mutex);
6142 
6143 	return 0;
6144 }
6145 
6146 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6147 				  struct amdgpu_irq_src *source,
6148 				  struct amdgpu_iv_entry *entry)
6149 {
6150 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6151 	struct ras_dispatch_if ih_data = {
6152 		.entry = entry,
6153 	};
6154 
6155 	if (!ras_if)
6156 		return 0;
6157 
6158 	ih_data.head = *ras_if;
6159 
6160 	DRM_ERROR("CP ECC ERROR IRQ\n");
6161 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6162 	return 0;
6163 }
6164 
6165 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6166 	.name = "gfx_v9_0",
6167 	.early_init = gfx_v9_0_early_init,
6168 	.late_init = gfx_v9_0_late_init,
6169 	.sw_init = gfx_v9_0_sw_init,
6170 	.sw_fini = gfx_v9_0_sw_fini,
6171 	.hw_init = gfx_v9_0_hw_init,
6172 	.hw_fini = gfx_v9_0_hw_fini,
6173 	.suspend = gfx_v9_0_suspend,
6174 	.resume = gfx_v9_0_resume,
6175 	.is_idle = gfx_v9_0_is_idle,
6176 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6177 	.soft_reset = gfx_v9_0_soft_reset,
6178 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6179 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6180 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6181 };
6182 
6183 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6184 	.type = AMDGPU_RING_TYPE_GFX,
6185 	.align_mask = 0xff,
6186 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6187 	.support_64bit_ptrs = true,
6188 	.vmhub = AMDGPU_GFXHUB_0,
6189 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6190 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6191 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6192 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6193 		5 +  /* COND_EXEC */
6194 		7 +  /* PIPELINE_SYNC */
6195 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6196 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6197 		2 + /* VM_FLUSH */
6198 		8 +  /* FENCE for VM_FLUSH */
6199 		20 + /* GDS switch */
6200 		4 + /* double SWITCH_BUFFER,
6201 		       the first COND_EXEC jump to the place just
6202 			   prior to this double SWITCH_BUFFER  */
6203 		5 + /* COND_EXEC */
6204 		7 +	 /*	HDP_flush */
6205 		4 +	 /*	VGT_flush */
6206 		14 + /*	CE_META */
6207 		31 + /*	DE_META */
6208 		3 + /* CNTX_CTRL */
6209 		5 + /* HDP_INVL */
6210 		8 + 8 + /* FENCE x2 */
6211 		2, /* SWITCH_BUFFER */
6212 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6213 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6214 	.emit_fence = gfx_v9_0_ring_emit_fence,
6215 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6216 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6217 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6218 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6219 	.test_ring = gfx_v9_0_ring_test_ring,
6220 	.test_ib = gfx_v9_0_ring_test_ib,
6221 	.insert_nop = amdgpu_ring_insert_nop,
6222 	.pad_ib = amdgpu_ring_generic_pad_ib,
6223 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6224 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6225 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6226 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6227 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6228 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6229 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6230 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6231 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6232 };
6233 
6234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6235 	.type = AMDGPU_RING_TYPE_COMPUTE,
6236 	.align_mask = 0xff,
6237 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6238 	.support_64bit_ptrs = true,
6239 	.vmhub = AMDGPU_GFXHUB_0,
6240 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6241 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6242 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6243 	.emit_frame_size =
6244 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6245 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6246 		5 + /* hdp invalidate */
6247 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6248 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6249 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6250 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6251 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6252 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6253 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6254 	.emit_fence = gfx_v9_0_ring_emit_fence,
6255 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6256 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6257 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6258 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6259 	.test_ring = gfx_v9_0_ring_test_ring,
6260 	.test_ib = gfx_v9_0_ring_test_ib,
6261 	.insert_nop = amdgpu_ring_insert_nop,
6262 	.pad_ib = amdgpu_ring_generic_pad_ib,
6263 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6264 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6265 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6266 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6267 };
6268 
6269 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6270 	.type = AMDGPU_RING_TYPE_KIQ,
6271 	.align_mask = 0xff,
6272 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6273 	.support_64bit_ptrs = true,
6274 	.vmhub = AMDGPU_GFXHUB_0,
6275 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6276 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6277 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6278 	.emit_frame_size =
6279 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6280 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6281 		5 + /* hdp invalidate */
6282 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6283 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6284 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6285 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6286 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6287 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6288 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6289 	.test_ring = gfx_v9_0_ring_test_ring,
6290 	.insert_nop = amdgpu_ring_insert_nop,
6291 	.pad_ib = amdgpu_ring_generic_pad_ib,
6292 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6293 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6294 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6295 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6296 };
6297 
6298 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6299 {
6300 	int i;
6301 
6302 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6303 
6304 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6305 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6306 
6307 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6308 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6309 }
6310 
6311 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6312 	.set = gfx_v9_0_set_eop_interrupt_state,
6313 	.process = gfx_v9_0_eop_irq,
6314 };
6315 
6316 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6317 	.set = gfx_v9_0_set_priv_reg_fault_state,
6318 	.process = gfx_v9_0_priv_reg_irq,
6319 };
6320 
6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6322 	.set = gfx_v9_0_set_priv_inst_fault_state,
6323 	.process = gfx_v9_0_priv_inst_irq,
6324 };
6325 
6326 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6327 	.set = gfx_v9_0_set_cp_ecc_error_state,
6328 	.process = gfx_v9_0_cp_ecc_error_irq,
6329 };
6330 
6331 
6332 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6333 {
6334 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6335 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6336 
6337 	adev->gfx.priv_reg_irq.num_types = 1;
6338 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6339 
6340 	adev->gfx.priv_inst_irq.num_types = 1;
6341 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6342 
6343 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6344 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6345 }
6346 
6347 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6348 {
6349 	switch (adev->asic_type) {
6350 	case CHIP_VEGA10:
6351 	case CHIP_VEGA12:
6352 	case CHIP_VEGA20:
6353 	case CHIP_RAVEN:
6354 	case CHIP_ARCTURUS:
6355 	case CHIP_RENOIR:
6356 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6357 		break;
6358 	default:
6359 		break;
6360 	}
6361 }
6362 
6363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6364 {
6365 	/* init asci gds info */
6366 	switch (adev->asic_type) {
6367 	case CHIP_VEGA10:
6368 	case CHIP_VEGA12:
6369 	case CHIP_VEGA20:
6370 		adev->gds.gds_size = 0x10000;
6371 		break;
6372 	case CHIP_RAVEN:
6373 	case CHIP_ARCTURUS:
6374 		adev->gds.gds_size = 0x1000;
6375 		break;
6376 	default:
6377 		adev->gds.gds_size = 0x10000;
6378 		break;
6379 	}
6380 
6381 	switch (adev->asic_type) {
6382 	case CHIP_VEGA10:
6383 	case CHIP_VEGA20:
6384 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6385 		break;
6386 	case CHIP_VEGA12:
6387 		adev->gds.gds_compute_max_wave_id = 0x27f;
6388 		break;
6389 	case CHIP_RAVEN:
6390 		if (adev->rev_id >= 0x8)
6391 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6392 		else
6393 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6394 		break;
6395 	case CHIP_ARCTURUS:
6396 		adev->gds.gds_compute_max_wave_id = 0xfff;
6397 		break;
6398 	default:
6399 		/* this really depends on the chip */
6400 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6401 		break;
6402 	}
6403 
6404 	adev->gds.gws_size = 64;
6405 	adev->gds.oa_size = 16;
6406 }
6407 
6408 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6409 						 u32 bitmap)
6410 {
6411 	u32 data;
6412 
6413 	if (!bitmap)
6414 		return;
6415 
6416 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6417 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6418 
6419 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6420 }
6421 
6422 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6423 {
6424 	u32 data, mask;
6425 
6426 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6427 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6428 
6429 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6430 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6431 
6432 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6433 
6434 	return (~data) & mask;
6435 }
6436 
6437 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6438 				 struct amdgpu_cu_info *cu_info)
6439 {
6440 	int i, j, k, counter, active_cu_number = 0;
6441 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6442 	unsigned disable_masks[4 * 4];
6443 
6444 	if (!adev || !cu_info)
6445 		return -EINVAL;
6446 
6447 	/*
6448 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6449 	 */
6450 	if (adev->gfx.config.max_shader_engines *
6451 		adev->gfx.config.max_sh_per_se > 16)
6452 		return -EINVAL;
6453 
6454 	amdgpu_gfx_parse_disable_cu(disable_masks,
6455 				    adev->gfx.config.max_shader_engines,
6456 				    adev->gfx.config.max_sh_per_se);
6457 
6458 	mutex_lock(&adev->grbm_idx_mutex);
6459 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6460 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6461 			mask = 1;
6462 			ao_bitmap = 0;
6463 			counter = 0;
6464 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6465 			gfx_v9_0_set_user_cu_inactive_bitmap(
6466 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6467 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6468 
6469 			/*
6470 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6471 			 * 4x4 size array, and it's usually suitable for Vega
6472 			 * ASICs which has 4*2 SE/SH layout.
6473 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6474 			 * To mostly reduce the impact, we make it compatible
6475 			 * with current bitmap array as below:
6476 			 *    SE4,SH0 --> bitmap[0][1]
6477 			 *    SE5,SH0 --> bitmap[1][1]
6478 			 *    SE6,SH0 --> bitmap[2][1]
6479 			 *    SE7,SH0 --> bitmap[3][1]
6480 			 */
6481 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6482 
6483 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6484 				if (bitmap & mask) {
6485 					if (counter < adev->gfx.config.max_cu_per_sh)
6486 						ao_bitmap |= mask;
6487 					counter ++;
6488 				}
6489 				mask <<= 1;
6490 			}
6491 			active_cu_number += counter;
6492 			if (i < 2 && j < 2)
6493 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6494 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6495 		}
6496 	}
6497 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6498 	mutex_unlock(&adev->grbm_idx_mutex);
6499 
6500 	cu_info->number = active_cu_number;
6501 	cu_info->ao_cu_mask = ao_cu_mask;
6502 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6503 
6504 	return 0;
6505 }
6506 
6507 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6508 {
6509 	.type = AMD_IP_BLOCK_TYPE_GFX,
6510 	.major = 9,
6511 	.minor = 0,
6512 	.rev = 0,
6513 	.funcs = &gfx_v9_0_ip_funcs,
6514 };
6515