xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 5a158981)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 struct ras_gfx_subblock_reg {
135 	const char *name;
136 	uint32_t hwip;
137 	uint32_t inst;
138 	uint32_t seg;
139 	uint32_t reg_offset;
140 	uint32_t sec_count_mask;
141 	uint32_t sec_count_shift;
142 	uint32_t ded_count_mask;
143 	uint32_t ded_count_shift;
144 };
145 
146 enum ta_ras_gfx_subblock {
147 	/*CPC*/
148 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
149 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
150 	TA_RAS_BLOCK__GFX_CPC_UCODE,
151 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
152 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
153 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
155 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
156 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
157 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
158 	/* CPF*/
159 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
160 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
161 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
162 	TA_RAS_BLOCK__GFX_CPF_TAG,
163 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
164 	/* CPG*/
165 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
166 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
167 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
168 	TA_RAS_BLOCK__GFX_CPG_TAG,
169 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
170 	/* GDS*/
171 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
172 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
173 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
174 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
175 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
178 	/* SPI*/
179 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
180 	/* SQ*/
181 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
182 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
183 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
184 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
185 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
186 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
187 	/* SQC (3 ranges)*/
188 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
189 	/* SQC range 0*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
193 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
194 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
200 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	/* SQC range 1*/
202 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
203 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
204 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
206 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
208 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
209 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
214 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	/* SQC range 2*/
216 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
217 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
218 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
220 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
222 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
223 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
227 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
228 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
230 	/* TA*/
231 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
232 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
233 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
234 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
235 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
237 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
238 	/* TCA*/
239 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
240 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
241 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
242 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
243 	/* TCC (5 sub-ranges)*/
244 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
245 	/* TCC range 0*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
248 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
253 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
254 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
255 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
256 	/* TCC range 1*/
257 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
258 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
259 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
261 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	/* TCC range 2*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
264 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
265 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
266 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
267 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
268 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
269 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
270 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
271 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
273 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	/* TCC range 3*/
275 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
276 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
277 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
278 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
279 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	/* TCC range 4*/
281 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
282 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
283 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
285 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
286 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
288 	/* TCI*/
289 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
290 	/* TCP*/
291 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
292 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
293 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
294 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
295 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
296 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
297 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
298 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
299 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
300 	/* TD*/
301 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
302 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
303 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
304 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
305 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
306 	/* EA (3 sub-ranges)*/
307 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
308 	/* EA range 0*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
311 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
313 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
314 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
316 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
318 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
319 	/* EA range 1*/
320 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
321 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
322 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
323 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
324 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
325 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
326 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
327 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
328 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
329 	/* EA range 2*/
330 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
331 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
332 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
335 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
336 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
337 	/* UTC VM L2 bank*/
338 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
339 	/* UTC VM walker*/
340 	TA_RAS_BLOCK__UTC_VML2_WALKER,
341 	/* UTC ATC L2 2MB cache*/
342 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
343 	/* UTC ATC L2 4KB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
345 	TA_RAS_BLOCK__GFX_MAX
346 };
347 
348 struct ras_gfx_subblock {
349 	unsigned char *name;
350 	int ta_subblock;
351 	int hw_supported_error_type;
352 	int sw_supported_error_type;
353 };
354 
355 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
356 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
357 		#subblock,                                                     \
358 		TA_RAS_BLOCK__##subblock,                                      \
359 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
360 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
361 	}
362 
363 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
381 			     0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
392 			     0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
394 			     0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400 			     0, 0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
404 			     1),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
406 			     0, 0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
418 			     0, 0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
424 			     0, 0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
436 			     0, 0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
448 			     1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
454 			     0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
469 			     0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
472 			     0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
474 			     0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
476 			     0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
511 };
512 
513 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
514 {
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
535 };
536 
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
538 {
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
557 };
558 
559 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
560 {
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
572 };
573 
574 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
575 {
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
600 };
601 
602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
603 {
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
611 };
612 
613 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
614 {
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
634 };
635 
636 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
637 {
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
650 };
651 
652 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
653 {
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
657 };
658 
659 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
660 {
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
677 };
678 
679 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
680 {
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
694 };
695 
696 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
697 {
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
708 };
709 
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
711 {
712 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 };
721 
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
723 {
724 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 };
733 
734 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
735 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
736 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
737 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
738 
739 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
740 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
741 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
742 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
743 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
744                                  struct amdgpu_cu_info *cu_info);
745 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
746 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
747 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
748 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
749 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
750 					  void *ras_error_status);
751 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
752 				     void *inject_if);
753 
754 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
755 {
756 	switch (adev->asic_type) {
757 	case CHIP_VEGA10:
758 		soc15_program_register_sequence(adev,
759 						golden_settings_gc_9_0,
760 						ARRAY_SIZE(golden_settings_gc_9_0));
761 		soc15_program_register_sequence(adev,
762 						golden_settings_gc_9_0_vg10,
763 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
764 		break;
765 	case CHIP_VEGA12:
766 		soc15_program_register_sequence(adev,
767 						golden_settings_gc_9_2_1,
768 						ARRAY_SIZE(golden_settings_gc_9_2_1));
769 		soc15_program_register_sequence(adev,
770 						golden_settings_gc_9_2_1_vg12,
771 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
772 		break;
773 	case CHIP_VEGA20:
774 		soc15_program_register_sequence(adev,
775 						golden_settings_gc_9_0,
776 						ARRAY_SIZE(golden_settings_gc_9_0));
777 		soc15_program_register_sequence(adev,
778 						golden_settings_gc_9_0_vg20,
779 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
780 		break;
781 	case CHIP_ARCTURUS:
782 		soc15_program_register_sequence(adev,
783 						golden_settings_gc_9_4_1_arct,
784 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
785 		break;
786 	case CHIP_RAVEN:
787 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
788 						ARRAY_SIZE(golden_settings_gc_9_1));
789 		if (adev->rev_id >= 8)
790 			soc15_program_register_sequence(adev,
791 							golden_settings_gc_9_1_rv2,
792 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
793 		else
794 			soc15_program_register_sequence(adev,
795 							golden_settings_gc_9_1_rv1,
796 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
797 		break;
798 	 case CHIP_RENOIR:
799 		soc15_program_register_sequence(adev,
800 						golden_settings_gc_9_1_rn,
801 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
802 		return; /* for renoir, don't need common goldensetting */
803 	default:
804 		break;
805 	}
806 
807 	if (adev->asic_type != CHIP_ARCTURUS)
808 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
809 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
810 }
811 
812 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
813 {
814 	adev->gfx.scratch.num_reg = 8;
815 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
816 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
817 }
818 
819 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
820 				       bool wc, uint32_t reg, uint32_t val)
821 {
822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
823 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
824 				WRITE_DATA_DST_SEL(0) |
825 				(wc ? WR_CONFIRM : 0));
826 	amdgpu_ring_write(ring, reg);
827 	amdgpu_ring_write(ring, 0);
828 	amdgpu_ring_write(ring, val);
829 }
830 
831 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
832 				  int mem_space, int opt, uint32_t addr0,
833 				  uint32_t addr1, uint32_t ref, uint32_t mask,
834 				  uint32_t inv)
835 {
836 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
837 	amdgpu_ring_write(ring,
838 				 /* memory (1) or register (0) */
839 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
840 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
841 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
842 				 WAIT_REG_MEM_ENGINE(eng_sel)));
843 
844 	if (mem_space)
845 		BUG_ON(addr0 & 0x3); /* Dword align */
846 	amdgpu_ring_write(ring, addr0);
847 	amdgpu_ring_write(ring, addr1);
848 	amdgpu_ring_write(ring, ref);
849 	amdgpu_ring_write(ring, mask);
850 	amdgpu_ring_write(ring, inv); /* poll interval */
851 }
852 
853 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
854 {
855 	struct amdgpu_device *adev = ring->adev;
856 	uint32_t scratch;
857 	uint32_t tmp = 0;
858 	unsigned i;
859 	int r;
860 
861 	r = amdgpu_gfx_scratch_get(adev, &scratch);
862 	if (r)
863 		return r;
864 
865 	WREG32(scratch, 0xCAFEDEAD);
866 	r = amdgpu_ring_alloc(ring, 3);
867 	if (r)
868 		goto error_free_scratch;
869 
870 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
871 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
872 	amdgpu_ring_write(ring, 0xDEADBEEF);
873 	amdgpu_ring_commit(ring);
874 
875 	for (i = 0; i < adev->usec_timeout; i++) {
876 		tmp = RREG32(scratch);
877 		if (tmp == 0xDEADBEEF)
878 			break;
879 		udelay(1);
880 	}
881 
882 	if (i >= adev->usec_timeout)
883 		r = -ETIMEDOUT;
884 
885 error_free_scratch:
886 	amdgpu_gfx_scratch_free(adev, scratch);
887 	return r;
888 }
889 
890 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
891 {
892 	struct amdgpu_device *adev = ring->adev;
893 	struct amdgpu_ib ib;
894 	struct dma_fence *f = NULL;
895 
896 	unsigned index;
897 	uint64_t gpu_addr;
898 	uint32_t tmp;
899 	long r;
900 
901 	r = amdgpu_device_wb_get(adev, &index);
902 	if (r)
903 		return r;
904 
905 	gpu_addr = adev->wb.gpu_addr + (index * 4);
906 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
907 	memset(&ib, 0, sizeof(ib));
908 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
909 	if (r)
910 		goto err1;
911 
912 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
913 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
914 	ib.ptr[2] = lower_32_bits(gpu_addr);
915 	ib.ptr[3] = upper_32_bits(gpu_addr);
916 	ib.ptr[4] = 0xDEADBEEF;
917 	ib.length_dw = 5;
918 
919 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
920 	if (r)
921 		goto err2;
922 
923 	r = dma_fence_wait_timeout(f, false, timeout);
924 	if (r == 0) {
925 		r = -ETIMEDOUT;
926 		goto err2;
927 	} else if (r < 0) {
928 		goto err2;
929 	}
930 
931 	tmp = adev->wb.wb[index];
932 	if (tmp == 0xDEADBEEF)
933 		r = 0;
934 	else
935 		r = -EINVAL;
936 
937 err2:
938 	amdgpu_ib_free(adev, &ib, NULL);
939 	dma_fence_put(f);
940 err1:
941 	amdgpu_device_wb_free(adev, index);
942 	return r;
943 }
944 
945 
946 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
947 {
948 	release_firmware(adev->gfx.pfp_fw);
949 	adev->gfx.pfp_fw = NULL;
950 	release_firmware(adev->gfx.me_fw);
951 	adev->gfx.me_fw = NULL;
952 	release_firmware(adev->gfx.ce_fw);
953 	adev->gfx.ce_fw = NULL;
954 	release_firmware(adev->gfx.rlc_fw);
955 	adev->gfx.rlc_fw = NULL;
956 	release_firmware(adev->gfx.mec_fw);
957 	adev->gfx.mec_fw = NULL;
958 	release_firmware(adev->gfx.mec2_fw);
959 	adev->gfx.mec2_fw = NULL;
960 
961 	kfree(adev->gfx.rlc.register_list_format);
962 }
963 
964 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
965 {
966 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
967 
968 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
969 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
970 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
971 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
972 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
973 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
974 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
975 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
976 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
977 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
978 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
979 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
980 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
981 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
982 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
983 }
984 
985 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
986 {
987 	adev->gfx.me_fw_write_wait = false;
988 	adev->gfx.mec_fw_write_wait = false;
989 
990 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
991 	    (adev->gfx.mec_feature_version < 46) ||
992 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
993 	    (adev->gfx.pfp_feature_version < 46))
994 		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
995 			      GRBM requires 1-cycle delay in cp firmware\n");
996 
997 	switch (adev->asic_type) {
998 	case CHIP_VEGA10:
999 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1000 		    (adev->gfx.me_feature_version >= 42) &&
1001 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1002 		    (adev->gfx.pfp_feature_version >= 42))
1003 			adev->gfx.me_fw_write_wait = true;
1004 
1005 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1006 		    (adev->gfx.mec_feature_version >= 42))
1007 			adev->gfx.mec_fw_write_wait = true;
1008 		break;
1009 	case CHIP_VEGA12:
1010 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1011 		    (adev->gfx.me_feature_version >= 44) &&
1012 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1013 		    (adev->gfx.pfp_feature_version >= 44))
1014 			adev->gfx.me_fw_write_wait = true;
1015 
1016 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1017 		    (adev->gfx.mec_feature_version >= 44))
1018 			adev->gfx.mec_fw_write_wait = true;
1019 		break;
1020 	case CHIP_VEGA20:
1021 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1022 		    (adev->gfx.me_feature_version >= 44) &&
1023 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1024 		    (adev->gfx.pfp_feature_version >= 44))
1025 			adev->gfx.me_fw_write_wait = true;
1026 
1027 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1028 		    (adev->gfx.mec_feature_version >= 44))
1029 			adev->gfx.mec_fw_write_wait = true;
1030 		break;
1031 	case CHIP_RAVEN:
1032 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1033 		    (adev->gfx.me_feature_version >= 42) &&
1034 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1035 		    (adev->gfx.pfp_feature_version >= 42))
1036 			adev->gfx.me_fw_write_wait = true;
1037 
1038 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1039 		    (adev->gfx.mec_feature_version >= 42))
1040 			adev->gfx.mec_fw_write_wait = true;
1041 		break;
1042 	default:
1043 		break;
1044 	}
1045 }
1046 
1047 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1048 {
1049 	switch (adev->asic_type) {
1050 	case CHIP_VEGA10:
1051 	case CHIP_VEGA12:
1052 	case CHIP_VEGA20:
1053 		break;
1054 	case CHIP_RAVEN:
1055 		if (!(adev->rev_id >= 0x8 ||
1056 		      adev->pdev->device == 0x15d8) &&
1057 		    (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1058 		     !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1059 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1060 
1061 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1062 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1063 				AMD_PG_SUPPORT_CP |
1064 				AMD_PG_SUPPORT_RLC_SMU_HS;
1065 		break;
1066 	case CHIP_RENOIR:
1067 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1068 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1069 				AMD_PG_SUPPORT_CP |
1070 				AMD_PG_SUPPORT_RLC_SMU_HS;
1071 		break;
1072 	default:
1073 		break;
1074 	}
1075 }
1076 
1077 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1078 					  const char *chip_name)
1079 {
1080 	char fw_name[30];
1081 	int err;
1082 	struct amdgpu_firmware_info *info = NULL;
1083 	const struct common_firmware_header *header = NULL;
1084 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1085 
1086 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1087 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1088 	if (err)
1089 		goto out;
1090 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1091 	if (err)
1092 		goto out;
1093 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1094 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1095 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1096 
1097 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1098 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1099 	if (err)
1100 		goto out;
1101 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1102 	if (err)
1103 		goto out;
1104 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1105 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1106 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1107 
1108 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1109 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1110 	if (err)
1111 		goto out;
1112 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1113 	if (err)
1114 		goto out;
1115 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1116 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1117 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1118 
1119 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1120 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1121 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1122 		info->fw = adev->gfx.pfp_fw;
1123 		header = (const struct common_firmware_header *)info->fw->data;
1124 		adev->firmware.fw_size +=
1125 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1126 
1127 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1128 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1129 		info->fw = adev->gfx.me_fw;
1130 		header = (const struct common_firmware_header *)info->fw->data;
1131 		adev->firmware.fw_size +=
1132 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1133 
1134 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1135 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1136 		info->fw = adev->gfx.ce_fw;
1137 		header = (const struct common_firmware_header *)info->fw->data;
1138 		adev->firmware.fw_size +=
1139 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1140 	}
1141 
1142 out:
1143 	if (err) {
1144 		dev_err(adev->dev,
1145 			"gfx9: Failed to load firmware \"%s\"\n",
1146 			fw_name);
1147 		release_firmware(adev->gfx.pfp_fw);
1148 		adev->gfx.pfp_fw = NULL;
1149 		release_firmware(adev->gfx.me_fw);
1150 		adev->gfx.me_fw = NULL;
1151 		release_firmware(adev->gfx.ce_fw);
1152 		adev->gfx.ce_fw = NULL;
1153 	}
1154 	return err;
1155 }
1156 
1157 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1158 					  const char *chip_name)
1159 {
1160 	char fw_name[30];
1161 	int err;
1162 	struct amdgpu_firmware_info *info = NULL;
1163 	const struct common_firmware_header *header = NULL;
1164 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1165 	unsigned int *tmp = NULL;
1166 	unsigned int i = 0;
1167 	uint16_t version_major;
1168 	uint16_t version_minor;
1169 	uint32_t smu_version;
1170 
1171 	/*
1172 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1173 	 * instead of picasso_rlc.bin.
1174 	 * Judgment method:
1175 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1176 	 *          or revision >= 0xD8 && revision <= 0xDF
1177 	 * otherwise is PCO FP5
1178 	 */
1179 	if (!strcmp(chip_name, "picasso") &&
1180 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1181 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1182 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1183 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1184 		(smu_version >= 0x41e2b))
1185 		/**
1186 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1187 		*/
1188 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1189 	else
1190 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1191 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1192 	if (err)
1193 		goto out;
1194 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1195 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1196 
1197 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1198 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1199 	if (version_major == 2 && version_minor == 1)
1200 		adev->gfx.rlc.is_rlc_v2_1 = true;
1201 
1202 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1203 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1204 	adev->gfx.rlc.save_and_restore_offset =
1205 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1206 	adev->gfx.rlc.clear_state_descriptor_offset =
1207 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1208 	adev->gfx.rlc.avail_scratch_ram_locations =
1209 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1210 	adev->gfx.rlc.reg_restore_list_size =
1211 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1212 	adev->gfx.rlc.reg_list_format_start =
1213 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1214 	adev->gfx.rlc.reg_list_format_separate_start =
1215 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1216 	adev->gfx.rlc.starting_offsets_start =
1217 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1218 	adev->gfx.rlc.reg_list_format_size_bytes =
1219 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1220 	adev->gfx.rlc.reg_list_size_bytes =
1221 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1222 	adev->gfx.rlc.register_list_format =
1223 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1224 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1225 	if (!adev->gfx.rlc.register_list_format) {
1226 		err = -ENOMEM;
1227 		goto out;
1228 	}
1229 
1230 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1231 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1232 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1233 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1234 
1235 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1236 
1237 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1238 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1239 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1240 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1241 
1242 	if (adev->gfx.rlc.is_rlc_v2_1)
1243 		gfx_v9_0_init_rlc_ext_microcode(adev);
1244 
1245 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1246 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1247 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1248 		info->fw = adev->gfx.rlc_fw;
1249 		header = (const struct common_firmware_header *)info->fw->data;
1250 		adev->firmware.fw_size +=
1251 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1252 
1253 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1254 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1255 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1256 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1257 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1258 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1259 			info->fw = adev->gfx.rlc_fw;
1260 			adev->firmware.fw_size +=
1261 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1262 
1263 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1264 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1265 			info->fw = adev->gfx.rlc_fw;
1266 			adev->firmware.fw_size +=
1267 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1268 
1269 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1270 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1271 			info->fw = adev->gfx.rlc_fw;
1272 			adev->firmware.fw_size +=
1273 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1274 		}
1275 	}
1276 
1277 out:
1278 	if (err) {
1279 		dev_err(adev->dev,
1280 			"gfx9: Failed to load firmware \"%s\"\n",
1281 			fw_name);
1282 		release_firmware(adev->gfx.rlc_fw);
1283 		adev->gfx.rlc_fw = NULL;
1284 	}
1285 	return err;
1286 }
1287 
1288 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1289 					  const char *chip_name)
1290 {
1291 	char fw_name[30];
1292 	int err;
1293 	struct amdgpu_firmware_info *info = NULL;
1294 	const struct common_firmware_header *header = NULL;
1295 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1296 
1297 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1298 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1299 	if (err)
1300 		goto out;
1301 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1302 	if (err)
1303 		goto out;
1304 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1305 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1306 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1307 
1308 
1309 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1310 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1311 	if (!err) {
1312 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1313 		if (err)
1314 			goto out;
1315 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1316 		adev->gfx.mec2_fw->data;
1317 		adev->gfx.mec2_fw_version =
1318 		le32_to_cpu(cp_hdr->header.ucode_version);
1319 		adev->gfx.mec2_feature_version =
1320 		le32_to_cpu(cp_hdr->ucode_feature_version);
1321 	} else {
1322 		err = 0;
1323 		adev->gfx.mec2_fw = NULL;
1324 	}
1325 
1326 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1327 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1328 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1329 		info->fw = adev->gfx.mec_fw;
1330 		header = (const struct common_firmware_header *)info->fw->data;
1331 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1332 		adev->firmware.fw_size +=
1333 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1334 
1335 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1336 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1337 		info->fw = adev->gfx.mec_fw;
1338 		adev->firmware.fw_size +=
1339 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1340 
1341 		if (adev->gfx.mec2_fw) {
1342 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1343 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1344 			info->fw = adev->gfx.mec2_fw;
1345 			header = (const struct common_firmware_header *)info->fw->data;
1346 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1347 			adev->firmware.fw_size +=
1348 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1349 
1350 			/* TODO: Determine if MEC2 JT FW loading can be removed
1351 				 for all GFX V9 asic and above */
1352 			if (adev->asic_type != CHIP_ARCTURUS &&
1353 			    adev->asic_type != CHIP_RENOIR) {
1354 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1355 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1356 				info->fw = adev->gfx.mec2_fw;
1357 				adev->firmware.fw_size +=
1358 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1359 					PAGE_SIZE);
1360 			}
1361 		}
1362 	}
1363 
1364 out:
1365 	gfx_v9_0_check_if_need_gfxoff(adev);
1366 	gfx_v9_0_check_fw_write_wait(adev);
1367 	if (err) {
1368 		dev_err(adev->dev,
1369 			"gfx9: Failed to load firmware \"%s\"\n",
1370 			fw_name);
1371 		release_firmware(adev->gfx.mec_fw);
1372 		adev->gfx.mec_fw = NULL;
1373 		release_firmware(adev->gfx.mec2_fw);
1374 		adev->gfx.mec2_fw = NULL;
1375 	}
1376 	return err;
1377 }
1378 
1379 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1380 {
1381 	const char *chip_name;
1382 	int r;
1383 
1384 	DRM_DEBUG("\n");
1385 
1386 	switch (adev->asic_type) {
1387 	case CHIP_VEGA10:
1388 		chip_name = "vega10";
1389 		break;
1390 	case CHIP_VEGA12:
1391 		chip_name = "vega12";
1392 		break;
1393 	case CHIP_VEGA20:
1394 		chip_name = "vega20";
1395 		break;
1396 	case CHIP_RAVEN:
1397 		if (adev->rev_id >= 8)
1398 			chip_name = "raven2";
1399 		else if (adev->pdev->device == 0x15d8)
1400 			chip_name = "picasso";
1401 		else
1402 			chip_name = "raven";
1403 		break;
1404 	case CHIP_ARCTURUS:
1405 		chip_name = "arcturus";
1406 		break;
1407 	case CHIP_RENOIR:
1408 		chip_name = "renoir";
1409 		break;
1410 	default:
1411 		BUG();
1412 	}
1413 
1414 	/* No CPG in Arcturus */
1415 	if (adev->asic_type != CHIP_ARCTURUS) {
1416 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1417 		if (r)
1418 			return r;
1419 	}
1420 
1421 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1422 	if (r)
1423 		return r;
1424 
1425 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1426 	if (r)
1427 		return r;
1428 
1429 	return r;
1430 }
1431 
1432 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1433 {
1434 	u32 count = 0;
1435 	const struct cs_section_def *sect = NULL;
1436 	const struct cs_extent_def *ext = NULL;
1437 
1438 	/* begin clear state */
1439 	count += 2;
1440 	/* context control state */
1441 	count += 3;
1442 
1443 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1444 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1445 			if (sect->id == SECT_CONTEXT)
1446 				count += 2 + ext->reg_count;
1447 			else
1448 				return 0;
1449 		}
1450 	}
1451 
1452 	/* end clear state */
1453 	count += 2;
1454 	/* clear state */
1455 	count += 2;
1456 
1457 	return count;
1458 }
1459 
1460 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1461 				    volatile u32 *buffer)
1462 {
1463 	u32 count = 0, i;
1464 	const struct cs_section_def *sect = NULL;
1465 	const struct cs_extent_def *ext = NULL;
1466 
1467 	if (adev->gfx.rlc.cs_data == NULL)
1468 		return;
1469 	if (buffer == NULL)
1470 		return;
1471 
1472 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1473 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1474 
1475 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1476 	buffer[count++] = cpu_to_le32(0x80000000);
1477 	buffer[count++] = cpu_to_le32(0x80000000);
1478 
1479 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1480 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1481 			if (sect->id == SECT_CONTEXT) {
1482 				buffer[count++] =
1483 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1484 				buffer[count++] = cpu_to_le32(ext->reg_index -
1485 						PACKET3_SET_CONTEXT_REG_START);
1486 				for (i = 0; i < ext->reg_count; i++)
1487 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1488 			} else {
1489 				return;
1490 			}
1491 		}
1492 	}
1493 
1494 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1495 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1496 
1497 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1498 	buffer[count++] = cpu_to_le32(0);
1499 }
1500 
1501 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1502 {
1503 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1504 	uint32_t pg_always_on_cu_num = 2;
1505 	uint32_t always_on_cu_num;
1506 	uint32_t i, j, k;
1507 	uint32_t mask, cu_bitmap, counter;
1508 
1509 	if (adev->flags & AMD_IS_APU)
1510 		always_on_cu_num = 4;
1511 	else if (adev->asic_type == CHIP_VEGA12)
1512 		always_on_cu_num = 8;
1513 	else
1514 		always_on_cu_num = 12;
1515 
1516 	mutex_lock(&adev->grbm_idx_mutex);
1517 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1518 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1519 			mask = 1;
1520 			cu_bitmap = 0;
1521 			counter = 0;
1522 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1523 
1524 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1525 				if (cu_info->bitmap[i][j] & mask) {
1526 					if (counter == pg_always_on_cu_num)
1527 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1528 					if (counter < always_on_cu_num)
1529 						cu_bitmap |= mask;
1530 					else
1531 						break;
1532 					counter++;
1533 				}
1534 				mask <<= 1;
1535 			}
1536 
1537 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1538 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1539 		}
1540 	}
1541 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1542 	mutex_unlock(&adev->grbm_idx_mutex);
1543 }
1544 
1545 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1546 {
1547 	uint32_t data;
1548 
1549 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1550 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1551 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1552 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1553 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1554 
1555 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1556 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1557 
1558 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1559 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1560 
1561 	mutex_lock(&adev->grbm_idx_mutex);
1562 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1563 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1564 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1565 
1566 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1567 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1568 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1569 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1570 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1571 
1572 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1573 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1574 	data &= 0x0000FFFF;
1575 	data |= 0x00C00000;
1576 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1577 
1578 	/*
1579 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1580 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1581 	 */
1582 
1583 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1584 	 * but used for RLC_LB_CNTL configuration */
1585 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1586 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1587 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1588 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1589 	mutex_unlock(&adev->grbm_idx_mutex);
1590 
1591 	gfx_v9_0_init_always_on_cu_mask(adev);
1592 }
1593 
1594 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1595 {
1596 	uint32_t data;
1597 
1598 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1599 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1600 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1601 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1602 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1603 
1604 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1605 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1606 
1607 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1608 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1609 
1610 	mutex_lock(&adev->grbm_idx_mutex);
1611 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1612 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1613 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1614 
1615 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1616 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1617 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1618 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1619 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1620 
1621 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1622 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1623 	data &= 0x0000FFFF;
1624 	data |= 0x00C00000;
1625 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1626 
1627 	/*
1628 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1629 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1630 	 */
1631 
1632 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1633 	 * but used for RLC_LB_CNTL configuration */
1634 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1635 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1636 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1637 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1638 	mutex_unlock(&adev->grbm_idx_mutex);
1639 
1640 	gfx_v9_0_init_always_on_cu_mask(adev);
1641 }
1642 
1643 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1644 {
1645 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1646 }
1647 
1648 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1649 {
1650 	return 5;
1651 }
1652 
1653 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1654 {
1655 	const struct cs_section_def *cs_data;
1656 	int r;
1657 
1658 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1659 
1660 	cs_data = adev->gfx.rlc.cs_data;
1661 
1662 	if (cs_data) {
1663 		/* init clear state block */
1664 		r = amdgpu_gfx_rlc_init_csb(adev);
1665 		if (r)
1666 			return r;
1667 	}
1668 
1669 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1670 		/* TODO: double check the cp_table_size for RV */
1671 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1672 		r = amdgpu_gfx_rlc_init_cpt(adev);
1673 		if (r)
1674 			return r;
1675 	}
1676 
1677 	switch (adev->asic_type) {
1678 	case CHIP_RAVEN:
1679 		gfx_v9_0_init_lbpw(adev);
1680 		break;
1681 	case CHIP_VEGA20:
1682 		gfx_v9_4_init_lbpw(adev);
1683 		break;
1684 	default:
1685 		break;
1686 	}
1687 
1688 	return 0;
1689 }
1690 
1691 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1692 {
1693 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1694 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1695 }
1696 
1697 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1698 {
1699 	int r;
1700 	u32 *hpd;
1701 	const __le32 *fw_data;
1702 	unsigned fw_size;
1703 	u32 *fw;
1704 	size_t mec_hpd_size;
1705 
1706 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1707 
1708 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1709 
1710 	/* take ownership of the relevant compute queues */
1711 	amdgpu_gfx_compute_queue_acquire(adev);
1712 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1713 
1714 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1715 				      AMDGPU_GEM_DOMAIN_VRAM,
1716 				      &adev->gfx.mec.hpd_eop_obj,
1717 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1718 				      (void **)&hpd);
1719 	if (r) {
1720 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1721 		gfx_v9_0_mec_fini(adev);
1722 		return r;
1723 	}
1724 
1725 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1726 
1727 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1728 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1729 
1730 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1731 
1732 	fw_data = (const __le32 *)
1733 		(adev->gfx.mec_fw->data +
1734 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1735 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1736 
1737 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1738 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1739 				      &adev->gfx.mec.mec_fw_obj,
1740 				      &adev->gfx.mec.mec_fw_gpu_addr,
1741 				      (void **)&fw);
1742 	if (r) {
1743 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1744 		gfx_v9_0_mec_fini(adev);
1745 		return r;
1746 	}
1747 
1748 	memcpy(fw, fw_data, fw_size);
1749 
1750 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1751 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1752 
1753 	return 0;
1754 }
1755 
1756 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1757 {
1758 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1759 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1760 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1761 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1762 		(SQ_IND_INDEX__FORCE_READ_MASK));
1763 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1764 }
1765 
1766 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1767 			   uint32_t wave, uint32_t thread,
1768 			   uint32_t regno, uint32_t num, uint32_t *out)
1769 {
1770 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1771 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1772 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1773 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1774 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1775 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1776 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1777 	while (num--)
1778 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1779 }
1780 
1781 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1782 {
1783 	/* type 1 wave data */
1784 	dst[(*no_fields)++] = 1;
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1790 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1799 }
1800 
1801 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1802 				     uint32_t wave, uint32_t start,
1803 				     uint32_t size, uint32_t *dst)
1804 {
1805 	wave_read_regs(
1806 		adev, simd, wave, 0,
1807 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1808 }
1809 
1810 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1811 				     uint32_t wave, uint32_t thread,
1812 				     uint32_t start, uint32_t size,
1813 				     uint32_t *dst)
1814 {
1815 	wave_read_regs(
1816 		adev, simd, wave, thread,
1817 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1818 }
1819 
1820 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1821 				  u32 me, u32 pipe, u32 q, u32 vm)
1822 {
1823 	soc15_grbm_select(adev, me, pipe, q, vm);
1824 }
1825 
1826 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1827 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1828 	.select_se_sh = &gfx_v9_0_select_se_sh,
1829 	.read_wave_data = &gfx_v9_0_read_wave_data,
1830 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1831 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1832 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1833 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1834 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1835 };
1836 
1837 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1838 {
1839 	u32 gb_addr_config;
1840 	int err;
1841 
1842 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1843 
1844 	switch (adev->asic_type) {
1845 	case CHIP_VEGA10:
1846 		adev->gfx.config.max_hw_contexts = 8;
1847 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1852 		break;
1853 	case CHIP_VEGA12:
1854 		adev->gfx.config.max_hw_contexts = 8;
1855 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1856 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1857 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1858 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1859 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1860 		DRM_INFO("fix gfx.config for vega12\n");
1861 		break;
1862 	case CHIP_VEGA20:
1863 		adev->gfx.config.max_hw_contexts = 8;
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1868 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1869 		gb_addr_config &= ~0xf3e777ff;
1870 		gb_addr_config |= 0x22014042;
1871 		/* check vbios table if gpu info is not available */
1872 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1873 		if (err)
1874 			return err;
1875 		break;
1876 	case CHIP_RAVEN:
1877 		adev->gfx.config.max_hw_contexts = 8;
1878 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1879 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1880 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1881 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1882 		if (adev->rev_id >= 8)
1883 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1884 		else
1885 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1886 		break;
1887 	case CHIP_ARCTURUS:
1888 		adev->gfx.config.max_hw_contexts = 8;
1889 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1894 		gb_addr_config &= ~0xf3e777ff;
1895 		gb_addr_config |= 0x22014042;
1896 		break;
1897 	case CHIP_RENOIR:
1898 		adev->gfx.config.max_hw_contexts = 8;
1899 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1902 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1903 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1904 		gb_addr_config &= ~0xf3e777ff;
1905 		gb_addr_config |= 0x22010042;
1906 		break;
1907 	default:
1908 		BUG();
1909 		break;
1910 	}
1911 
1912 	adev->gfx.config.gb_addr_config = gb_addr_config;
1913 
1914 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1915 			REG_GET_FIELD(
1916 					adev->gfx.config.gb_addr_config,
1917 					GB_ADDR_CONFIG,
1918 					NUM_PIPES);
1919 
1920 	adev->gfx.config.max_tile_pipes =
1921 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1922 
1923 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1924 			REG_GET_FIELD(
1925 					adev->gfx.config.gb_addr_config,
1926 					GB_ADDR_CONFIG,
1927 					NUM_BANKS);
1928 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1929 			REG_GET_FIELD(
1930 					adev->gfx.config.gb_addr_config,
1931 					GB_ADDR_CONFIG,
1932 					MAX_COMPRESSED_FRAGS);
1933 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1934 			REG_GET_FIELD(
1935 					adev->gfx.config.gb_addr_config,
1936 					GB_ADDR_CONFIG,
1937 					NUM_RB_PER_SE);
1938 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1939 			REG_GET_FIELD(
1940 					adev->gfx.config.gb_addr_config,
1941 					GB_ADDR_CONFIG,
1942 					NUM_SHADER_ENGINES);
1943 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1944 			REG_GET_FIELD(
1945 					adev->gfx.config.gb_addr_config,
1946 					GB_ADDR_CONFIG,
1947 					PIPE_INTERLEAVE_SIZE));
1948 
1949 	return 0;
1950 }
1951 
1952 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1953 				      int mec, int pipe, int queue)
1954 {
1955 	int r;
1956 	unsigned irq_type;
1957 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1958 
1959 	ring = &adev->gfx.compute_ring[ring_id];
1960 
1961 	/* mec0 is me1 */
1962 	ring->me = mec + 1;
1963 	ring->pipe = pipe;
1964 	ring->queue = queue;
1965 
1966 	ring->ring_obj = NULL;
1967 	ring->use_doorbell = true;
1968 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1969 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1970 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1971 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1972 
1973 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1974 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1975 		+ ring->pipe;
1976 
1977 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1978 	r = amdgpu_ring_init(adev, ring, 1024,
1979 			     &adev->gfx.eop_irq, irq_type);
1980 	if (r)
1981 		return r;
1982 
1983 
1984 	return 0;
1985 }
1986 
1987 static int gfx_v9_0_sw_init(void *handle)
1988 {
1989 	int i, j, k, r, ring_id;
1990 	struct amdgpu_ring *ring;
1991 	struct amdgpu_kiq *kiq;
1992 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1993 
1994 	switch (adev->asic_type) {
1995 	case CHIP_VEGA10:
1996 	case CHIP_VEGA12:
1997 	case CHIP_VEGA20:
1998 	case CHIP_RAVEN:
1999 	case CHIP_ARCTURUS:
2000 	case CHIP_RENOIR:
2001 		adev->gfx.mec.num_mec = 2;
2002 		break;
2003 	default:
2004 		adev->gfx.mec.num_mec = 1;
2005 		break;
2006 	}
2007 
2008 	adev->gfx.mec.num_pipe_per_mec = 4;
2009 	adev->gfx.mec.num_queue_per_pipe = 8;
2010 
2011 	/* EOP Event */
2012 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2013 	if (r)
2014 		return r;
2015 
2016 	/* Privileged reg */
2017 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2018 			      &adev->gfx.priv_reg_irq);
2019 	if (r)
2020 		return r;
2021 
2022 	/* Privileged inst */
2023 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2024 			      &adev->gfx.priv_inst_irq);
2025 	if (r)
2026 		return r;
2027 
2028 	/* ECC error */
2029 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2030 			      &adev->gfx.cp_ecc_error_irq);
2031 	if (r)
2032 		return r;
2033 
2034 	/* FUE error */
2035 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2036 			      &adev->gfx.cp_ecc_error_irq);
2037 	if (r)
2038 		return r;
2039 
2040 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2041 
2042 	gfx_v9_0_scratch_init(adev);
2043 
2044 	r = gfx_v9_0_init_microcode(adev);
2045 	if (r) {
2046 		DRM_ERROR("Failed to load gfx firmware!\n");
2047 		return r;
2048 	}
2049 
2050 	r = adev->gfx.rlc.funcs->init(adev);
2051 	if (r) {
2052 		DRM_ERROR("Failed to init rlc BOs!\n");
2053 		return r;
2054 	}
2055 
2056 	r = gfx_v9_0_mec_init(adev);
2057 	if (r) {
2058 		DRM_ERROR("Failed to init MEC BOs!\n");
2059 		return r;
2060 	}
2061 
2062 	/* set up the gfx ring */
2063 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2064 		ring = &adev->gfx.gfx_ring[i];
2065 		ring->ring_obj = NULL;
2066 		if (!i)
2067 			sprintf(ring->name, "gfx");
2068 		else
2069 			sprintf(ring->name, "gfx_%d", i);
2070 		ring->use_doorbell = true;
2071 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2072 		r = amdgpu_ring_init(adev, ring, 1024,
2073 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2074 		if (r)
2075 			return r;
2076 	}
2077 
2078 	/* set up the compute queues - allocate horizontally across pipes */
2079 	ring_id = 0;
2080 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2081 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2082 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2083 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2084 					continue;
2085 
2086 				r = gfx_v9_0_compute_ring_init(adev,
2087 							       ring_id,
2088 							       i, k, j);
2089 				if (r)
2090 					return r;
2091 
2092 				ring_id++;
2093 			}
2094 		}
2095 	}
2096 
2097 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2098 	if (r) {
2099 		DRM_ERROR("Failed to init KIQ BOs!\n");
2100 		return r;
2101 	}
2102 
2103 	kiq = &adev->gfx.kiq;
2104 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2105 	if (r)
2106 		return r;
2107 
2108 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2109 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2110 	if (r)
2111 		return r;
2112 
2113 	adev->gfx.ce_ram_size = 0x8000;
2114 
2115 	r = gfx_v9_0_gpu_early_init(adev);
2116 	if (r)
2117 		return r;
2118 
2119 	return 0;
2120 }
2121 
2122 
2123 static int gfx_v9_0_sw_fini(void *handle)
2124 {
2125 	int i;
2126 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2127 
2128 	amdgpu_gfx_ras_fini(adev);
2129 
2130 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2131 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2132 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2133 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2134 
2135 	amdgpu_gfx_mqd_sw_fini(adev);
2136 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2137 	amdgpu_gfx_kiq_fini(adev);
2138 
2139 	gfx_v9_0_mec_fini(adev);
2140 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2141 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2142 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2143 				&adev->gfx.rlc.cp_table_gpu_addr,
2144 				(void **)&adev->gfx.rlc.cp_table_ptr);
2145 	}
2146 	gfx_v9_0_free_microcode(adev);
2147 
2148 	return 0;
2149 }
2150 
2151 
2152 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2153 {
2154 	/* TODO */
2155 }
2156 
2157 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2158 {
2159 	u32 data;
2160 
2161 	if (instance == 0xffffffff)
2162 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2163 	else
2164 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2165 
2166 	if (se_num == 0xffffffff)
2167 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2168 	else
2169 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2170 
2171 	if (sh_num == 0xffffffff)
2172 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2173 	else
2174 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2175 
2176 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2177 }
2178 
2179 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2180 {
2181 	u32 data, mask;
2182 
2183 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2184 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2185 
2186 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2187 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2188 
2189 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2190 					 adev->gfx.config.max_sh_per_se);
2191 
2192 	return (~data) & mask;
2193 }
2194 
2195 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2196 {
2197 	int i, j;
2198 	u32 data;
2199 	u32 active_rbs = 0;
2200 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2201 					adev->gfx.config.max_sh_per_se;
2202 
2203 	mutex_lock(&adev->grbm_idx_mutex);
2204 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2205 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2206 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2207 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2208 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2209 					       rb_bitmap_width_per_sh);
2210 		}
2211 	}
2212 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2213 	mutex_unlock(&adev->grbm_idx_mutex);
2214 
2215 	adev->gfx.config.backend_enable_mask = active_rbs;
2216 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2217 }
2218 
2219 #define DEFAULT_SH_MEM_BASES	(0x6000)
2220 #define FIRST_COMPUTE_VMID	(8)
2221 #define LAST_COMPUTE_VMID	(16)
2222 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2223 {
2224 	int i;
2225 	uint32_t sh_mem_config;
2226 	uint32_t sh_mem_bases;
2227 
2228 	/*
2229 	 * Configure apertures:
2230 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2231 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2232 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2233 	 */
2234 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2235 
2236 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2237 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2238 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2239 
2240 	mutex_lock(&adev->srbm_mutex);
2241 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2242 		soc15_grbm_select(adev, 0, 0, 0, i);
2243 		/* CP and shaders */
2244 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2245 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2246 	}
2247 	soc15_grbm_select(adev, 0, 0, 0, 0);
2248 	mutex_unlock(&adev->srbm_mutex);
2249 
2250 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2251 	   acccess. These should be enabled by FW for target VMIDs. */
2252 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2253 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2254 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2255 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2256 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2257 	}
2258 }
2259 
2260 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2261 {
2262 	int vmid;
2263 
2264 	/*
2265 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2266 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2267 	 * the driver can enable them for graphics. VMID0 should maintain
2268 	 * access so that HWS firmware can save/restore entries.
2269 	 */
2270 	for (vmid = 1; vmid < 16; vmid++) {
2271 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2272 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2273 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2274 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2275 	}
2276 }
2277 
2278 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2279 {
2280 	u32 tmp;
2281 	int i;
2282 
2283 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2284 
2285 	gfx_v9_0_tiling_mode_table_init(adev);
2286 
2287 	gfx_v9_0_setup_rb(adev);
2288 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2289 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2290 
2291 	/* XXX SH_MEM regs */
2292 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2293 	mutex_lock(&adev->srbm_mutex);
2294 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2295 		soc15_grbm_select(adev, 0, 0, 0, i);
2296 		/* CP and shaders */
2297 		if (i == 0) {
2298 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2299 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2300 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2301 					    !!amdgpu_noretry);
2302 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2303 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2304 		} else {
2305 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2306 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2307 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2308 					    !!amdgpu_noretry);
2309 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2310 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2311 				(adev->gmc.private_aperture_start >> 48));
2312 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2313 				(adev->gmc.shared_aperture_start >> 48));
2314 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2315 		}
2316 	}
2317 	soc15_grbm_select(adev, 0, 0, 0, 0);
2318 
2319 	mutex_unlock(&adev->srbm_mutex);
2320 
2321 	gfx_v9_0_init_compute_vmid(adev);
2322 	gfx_v9_0_init_gds_vmid(adev);
2323 }
2324 
2325 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2326 {
2327 	u32 i, j, k;
2328 	u32 mask;
2329 
2330 	mutex_lock(&adev->grbm_idx_mutex);
2331 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2332 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2333 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2334 			for (k = 0; k < adev->usec_timeout; k++) {
2335 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2336 					break;
2337 				udelay(1);
2338 			}
2339 			if (k == adev->usec_timeout) {
2340 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2341 						      0xffffffff, 0xffffffff);
2342 				mutex_unlock(&adev->grbm_idx_mutex);
2343 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2344 					 i, j);
2345 				return;
2346 			}
2347 		}
2348 	}
2349 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2350 	mutex_unlock(&adev->grbm_idx_mutex);
2351 
2352 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2353 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2354 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2355 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2356 	for (k = 0; k < adev->usec_timeout; k++) {
2357 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2358 			break;
2359 		udelay(1);
2360 	}
2361 }
2362 
2363 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2364 					       bool enable)
2365 {
2366 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2367 
2368 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2369 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2370 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2371 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2372 
2373 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2374 }
2375 
2376 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2377 {
2378 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2379 	/* csib */
2380 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2381 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2382 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2383 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2384 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2385 			adev->gfx.rlc.clear_state_size);
2386 }
2387 
2388 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2389 				int indirect_offset,
2390 				int list_size,
2391 				int *unique_indirect_regs,
2392 				int unique_indirect_reg_count,
2393 				int *indirect_start_offsets,
2394 				int *indirect_start_offsets_count,
2395 				int max_start_offsets_count)
2396 {
2397 	int idx;
2398 
2399 	for (; indirect_offset < list_size; indirect_offset++) {
2400 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2401 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2402 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2403 
2404 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2405 			indirect_offset += 2;
2406 
2407 			/* look for the matching indice */
2408 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2409 				if (unique_indirect_regs[idx] ==
2410 					register_list_format[indirect_offset] ||
2411 					!unique_indirect_regs[idx])
2412 					break;
2413 			}
2414 
2415 			BUG_ON(idx >= unique_indirect_reg_count);
2416 
2417 			if (!unique_indirect_regs[idx])
2418 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2419 
2420 			indirect_offset++;
2421 		}
2422 	}
2423 }
2424 
2425 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2426 {
2427 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2428 	int unique_indirect_reg_count = 0;
2429 
2430 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2431 	int indirect_start_offsets_count = 0;
2432 
2433 	int list_size = 0;
2434 	int i = 0, j = 0;
2435 	u32 tmp = 0;
2436 
2437 	u32 *register_list_format =
2438 		kmemdup(adev->gfx.rlc.register_list_format,
2439 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2440 	if (!register_list_format)
2441 		return -ENOMEM;
2442 
2443 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2444 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2445 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2446 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2447 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2448 				    unique_indirect_regs,
2449 				    unique_indirect_reg_count,
2450 				    indirect_start_offsets,
2451 				    &indirect_start_offsets_count,
2452 				    ARRAY_SIZE(indirect_start_offsets));
2453 
2454 	/* enable auto inc in case it is disabled */
2455 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2456 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2457 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2458 
2459 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2460 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2461 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2462 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2463 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2464 			adev->gfx.rlc.register_restore[i]);
2465 
2466 	/* load indirect register */
2467 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2468 		adev->gfx.rlc.reg_list_format_start);
2469 
2470 	/* direct register portion */
2471 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2472 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2473 			register_list_format[i]);
2474 
2475 	/* indirect register portion */
2476 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2477 		if (register_list_format[i] == 0xFFFFFFFF) {
2478 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2479 			continue;
2480 		}
2481 
2482 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2483 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2484 
2485 		for (j = 0; j < unique_indirect_reg_count; j++) {
2486 			if (register_list_format[i] == unique_indirect_regs[j]) {
2487 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2488 				break;
2489 			}
2490 		}
2491 
2492 		BUG_ON(j >= unique_indirect_reg_count);
2493 
2494 		i++;
2495 	}
2496 
2497 	/* set save/restore list size */
2498 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2499 	list_size = list_size >> 1;
2500 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2501 		adev->gfx.rlc.reg_restore_list_size);
2502 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2503 
2504 	/* write the starting offsets to RLC scratch ram */
2505 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2506 		adev->gfx.rlc.starting_offsets_start);
2507 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2508 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2509 		       indirect_start_offsets[i]);
2510 
2511 	/* load unique indirect regs*/
2512 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2513 		if (unique_indirect_regs[i] != 0) {
2514 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2515 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2516 			       unique_indirect_regs[i] & 0x3FFFF);
2517 
2518 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2519 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2520 			       unique_indirect_regs[i] >> 20);
2521 		}
2522 	}
2523 
2524 	kfree(register_list_format);
2525 	return 0;
2526 }
2527 
2528 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2529 {
2530 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2531 }
2532 
2533 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2534 					     bool enable)
2535 {
2536 	uint32_t data = 0;
2537 	uint32_t default_data = 0;
2538 
2539 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2540 	if (enable == true) {
2541 		/* enable GFXIP control over CGPG */
2542 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2543 		if(default_data != data)
2544 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2545 
2546 		/* update status */
2547 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2548 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2549 		if(default_data != data)
2550 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2551 	} else {
2552 		/* restore GFXIP control over GCPG */
2553 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2554 		if(default_data != data)
2555 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2556 	}
2557 }
2558 
2559 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2560 {
2561 	uint32_t data = 0;
2562 
2563 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2564 			      AMD_PG_SUPPORT_GFX_SMG |
2565 			      AMD_PG_SUPPORT_GFX_DMG)) {
2566 		/* init IDLE_POLL_COUNT = 60 */
2567 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2568 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2569 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2570 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2571 
2572 		/* init RLC PG Delay */
2573 		data = 0;
2574 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2575 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2576 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2577 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2578 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2579 
2580 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2581 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2582 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2583 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2584 
2585 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2586 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2587 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2588 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2589 
2590 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2591 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2592 
2593 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2594 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2595 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2596 
2597 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2598 	}
2599 }
2600 
2601 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2602 						bool enable)
2603 {
2604 	uint32_t data = 0;
2605 	uint32_t default_data = 0;
2606 
2607 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2608 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2609 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2610 			     enable ? 1 : 0);
2611 	if (default_data != data)
2612 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2613 }
2614 
2615 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2616 						bool enable)
2617 {
2618 	uint32_t data = 0;
2619 	uint32_t default_data = 0;
2620 
2621 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2622 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2623 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2624 			     enable ? 1 : 0);
2625 	if(default_data != data)
2626 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2627 }
2628 
2629 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2630 					bool enable)
2631 {
2632 	uint32_t data = 0;
2633 	uint32_t default_data = 0;
2634 
2635 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2636 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2637 			     CP_PG_DISABLE,
2638 			     enable ? 0 : 1);
2639 	if(default_data != data)
2640 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2641 }
2642 
2643 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2644 						bool enable)
2645 {
2646 	uint32_t data, default_data;
2647 
2648 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2649 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2650 			     GFX_POWER_GATING_ENABLE,
2651 			     enable ? 1 : 0);
2652 	if(default_data != data)
2653 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2654 }
2655 
2656 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2657 						bool enable)
2658 {
2659 	uint32_t data, default_data;
2660 
2661 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2662 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2663 			     GFX_PIPELINE_PG_ENABLE,
2664 			     enable ? 1 : 0);
2665 	if(default_data != data)
2666 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2667 
2668 	if (!enable)
2669 		/* read any GFX register to wake up GFX */
2670 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2671 }
2672 
2673 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2674 						       bool enable)
2675 {
2676 	uint32_t data, default_data;
2677 
2678 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2679 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2680 			     STATIC_PER_CU_PG_ENABLE,
2681 			     enable ? 1 : 0);
2682 	if(default_data != data)
2683 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2684 }
2685 
2686 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2687 						bool enable)
2688 {
2689 	uint32_t data, default_data;
2690 
2691 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2692 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2693 			     DYN_PER_CU_PG_ENABLE,
2694 			     enable ? 1 : 0);
2695 	if(default_data != data)
2696 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2697 }
2698 
2699 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2700 {
2701 	gfx_v9_0_init_csb(adev);
2702 
2703 	/*
2704 	 * Rlc save restore list is workable since v2_1.
2705 	 * And it's needed by gfxoff feature.
2706 	 */
2707 	if (adev->gfx.rlc.is_rlc_v2_1) {
2708 		if (adev->asic_type == CHIP_VEGA12 ||
2709 		    (adev->asic_type == CHIP_RAVEN &&
2710 		     adev->rev_id >= 8))
2711 			gfx_v9_1_init_rlc_save_restore_list(adev);
2712 		gfx_v9_0_enable_save_restore_machine(adev);
2713 	}
2714 
2715 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2716 			      AMD_PG_SUPPORT_GFX_SMG |
2717 			      AMD_PG_SUPPORT_GFX_DMG |
2718 			      AMD_PG_SUPPORT_CP |
2719 			      AMD_PG_SUPPORT_GDS |
2720 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2721 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2722 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2723 		gfx_v9_0_init_gfx_power_gating(adev);
2724 	}
2725 }
2726 
2727 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2728 {
2729 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2730 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2731 	gfx_v9_0_wait_for_rlc_serdes(adev);
2732 }
2733 
2734 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2735 {
2736 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2737 	udelay(50);
2738 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2739 	udelay(50);
2740 }
2741 
2742 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2743 {
2744 #ifdef AMDGPU_RLC_DEBUG_RETRY
2745 	u32 rlc_ucode_ver;
2746 #endif
2747 
2748 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2749 	udelay(50);
2750 
2751 	/* carrizo do enable cp interrupt after cp inited */
2752 	if (!(adev->flags & AMD_IS_APU)) {
2753 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2754 		udelay(50);
2755 	}
2756 
2757 #ifdef AMDGPU_RLC_DEBUG_RETRY
2758 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2759 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2760 	if(rlc_ucode_ver == 0x108) {
2761 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2762 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2763 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2764 		 * default is 0x9C4 to create a 100us interval */
2765 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2766 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2767 		 * to disable the page fault retry interrupts, default is
2768 		 * 0x100 (256) */
2769 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2770 	}
2771 #endif
2772 }
2773 
2774 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2775 {
2776 	const struct rlc_firmware_header_v2_0 *hdr;
2777 	const __le32 *fw_data;
2778 	unsigned i, fw_size;
2779 
2780 	if (!adev->gfx.rlc_fw)
2781 		return -EINVAL;
2782 
2783 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2784 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2785 
2786 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2787 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2788 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2789 
2790 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2791 			RLCG_UCODE_LOADING_START_ADDRESS);
2792 	for (i = 0; i < fw_size; i++)
2793 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2794 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2795 
2796 	return 0;
2797 }
2798 
2799 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2800 {
2801 	int r;
2802 
2803 	if (amdgpu_sriov_vf(adev)) {
2804 		gfx_v9_0_init_csb(adev);
2805 		return 0;
2806 	}
2807 
2808 	adev->gfx.rlc.funcs->stop(adev);
2809 
2810 	/* disable CG */
2811 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2812 
2813 	gfx_v9_0_init_pg(adev);
2814 
2815 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2816 		/* legacy rlc firmware loading */
2817 		r = gfx_v9_0_rlc_load_microcode(adev);
2818 		if (r)
2819 			return r;
2820 	}
2821 
2822 	switch (adev->asic_type) {
2823 	case CHIP_RAVEN:
2824 		if (amdgpu_lbpw == 0)
2825 			gfx_v9_0_enable_lbpw(adev, false);
2826 		else
2827 			gfx_v9_0_enable_lbpw(adev, true);
2828 		break;
2829 	case CHIP_VEGA20:
2830 		if (amdgpu_lbpw > 0)
2831 			gfx_v9_0_enable_lbpw(adev, true);
2832 		else
2833 			gfx_v9_0_enable_lbpw(adev, false);
2834 		break;
2835 	default:
2836 		break;
2837 	}
2838 
2839 	adev->gfx.rlc.funcs->start(adev);
2840 
2841 	return 0;
2842 }
2843 
2844 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2845 {
2846 	int i;
2847 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2848 
2849 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2850 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2851 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2852 	if (!enable) {
2853 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2854 			adev->gfx.gfx_ring[i].sched.ready = false;
2855 	}
2856 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2857 	udelay(50);
2858 }
2859 
2860 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2861 {
2862 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2863 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2864 	const struct gfx_firmware_header_v1_0 *me_hdr;
2865 	const __le32 *fw_data;
2866 	unsigned i, fw_size;
2867 
2868 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2869 		return -EINVAL;
2870 
2871 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2872 		adev->gfx.pfp_fw->data;
2873 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2874 		adev->gfx.ce_fw->data;
2875 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2876 		adev->gfx.me_fw->data;
2877 
2878 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2879 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2880 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2881 
2882 	gfx_v9_0_cp_gfx_enable(adev, false);
2883 
2884 	/* PFP */
2885 	fw_data = (const __le32 *)
2886 		(adev->gfx.pfp_fw->data +
2887 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2888 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2889 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2890 	for (i = 0; i < fw_size; i++)
2891 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2892 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2893 
2894 	/* CE */
2895 	fw_data = (const __le32 *)
2896 		(adev->gfx.ce_fw->data +
2897 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2898 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2899 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2900 	for (i = 0; i < fw_size; i++)
2901 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2902 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2903 
2904 	/* ME */
2905 	fw_data = (const __le32 *)
2906 		(adev->gfx.me_fw->data +
2907 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2908 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2909 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2910 	for (i = 0; i < fw_size; i++)
2911 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2912 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2913 
2914 	return 0;
2915 }
2916 
2917 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2918 {
2919 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2920 	const struct cs_section_def *sect = NULL;
2921 	const struct cs_extent_def *ext = NULL;
2922 	int r, i, tmp;
2923 
2924 	/* init the CP */
2925 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2926 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2927 
2928 	gfx_v9_0_cp_gfx_enable(adev, true);
2929 
2930 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2931 	if (r) {
2932 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2933 		return r;
2934 	}
2935 
2936 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2937 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2938 
2939 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2940 	amdgpu_ring_write(ring, 0x80000000);
2941 	amdgpu_ring_write(ring, 0x80000000);
2942 
2943 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2944 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2945 			if (sect->id == SECT_CONTEXT) {
2946 				amdgpu_ring_write(ring,
2947 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2948 					       ext->reg_count));
2949 				amdgpu_ring_write(ring,
2950 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2951 				for (i = 0; i < ext->reg_count; i++)
2952 					amdgpu_ring_write(ring, ext->extent[i]);
2953 			}
2954 		}
2955 	}
2956 
2957 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2958 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2959 
2960 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2961 	amdgpu_ring_write(ring, 0);
2962 
2963 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2964 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2965 	amdgpu_ring_write(ring, 0x8000);
2966 	amdgpu_ring_write(ring, 0x8000);
2967 
2968 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2969 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2970 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2971 	amdgpu_ring_write(ring, tmp);
2972 	amdgpu_ring_write(ring, 0);
2973 
2974 	amdgpu_ring_commit(ring);
2975 
2976 	return 0;
2977 }
2978 
2979 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2980 {
2981 	struct amdgpu_ring *ring;
2982 	u32 tmp;
2983 	u32 rb_bufsz;
2984 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2985 
2986 	/* Set the write pointer delay */
2987 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2988 
2989 	/* set the RB to use vmid 0 */
2990 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2991 
2992 	/* Set ring buffer size */
2993 	ring = &adev->gfx.gfx_ring[0];
2994 	rb_bufsz = order_base_2(ring->ring_size / 8);
2995 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2996 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2997 #ifdef __BIG_ENDIAN
2998 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2999 #endif
3000 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3001 
3002 	/* Initialize the ring buffer's write pointers */
3003 	ring->wptr = 0;
3004 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3005 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3006 
3007 	/* set the wb address wether it's enabled or not */
3008 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3009 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3010 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3011 
3012 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3013 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3014 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3015 
3016 	mdelay(1);
3017 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3018 
3019 	rb_addr = ring->gpu_addr >> 8;
3020 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3021 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3022 
3023 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3024 	if (ring->use_doorbell) {
3025 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3026 				    DOORBELL_OFFSET, ring->doorbell_index);
3027 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3028 				    DOORBELL_EN, 1);
3029 	} else {
3030 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3031 	}
3032 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3033 
3034 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3035 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3036 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3037 
3038 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3039 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3040 
3041 
3042 	/* start the ring */
3043 	gfx_v9_0_cp_gfx_start(adev);
3044 	ring->sched.ready = true;
3045 
3046 	return 0;
3047 }
3048 
3049 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3050 {
3051 	int i;
3052 
3053 	if (enable) {
3054 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3055 	} else {
3056 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3057 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3058 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3059 			adev->gfx.compute_ring[i].sched.ready = false;
3060 		adev->gfx.kiq.ring.sched.ready = false;
3061 	}
3062 	udelay(50);
3063 }
3064 
3065 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3066 {
3067 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3068 	const __le32 *fw_data;
3069 	unsigned i;
3070 	u32 tmp;
3071 
3072 	if (!adev->gfx.mec_fw)
3073 		return -EINVAL;
3074 
3075 	gfx_v9_0_cp_compute_enable(adev, false);
3076 
3077 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3078 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3079 
3080 	fw_data = (const __le32 *)
3081 		(adev->gfx.mec_fw->data +
3082 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3083 	tmp = 0;
3084 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3085 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3086 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3087 
3088 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3089 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3090 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3091 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3092 
3093 	/* MEC1 */
3094 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3095 			 mec_hdr->jt_offset);
3096 	for (i = 0; i < mec_hdr->jt_size; i++)
3097 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3098 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3099 
3100 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3101 			adev->gfx.mec_fw_version);
3102 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3103 
3104 	return 0;
3105 }
3106 
3107 /* KIQ functions */
3108 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3109 {
3110 	uint32_t tmp;
3111 	struct amdgpu_device *adev = ring->adev;
3112 
3113 	/* tell RLC which is KIQ queue */
3114 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3115 	tmp &= 0xffffff00;
3116 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3117 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3118 	tmp |= 0x80;
3119 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3120 }
3121 
3122 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3123 {
3124 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3125 	uint64_t queue_mask = 0;
3126 	int r, i;
3127 
3128 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3129 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3130 			continue;
3131 
3132 		/* This situation may be hit in the future if a new HW
3133 		 * generation exposes more than 64 queues. If so, the
3134 		 * definition of queue_mask needs updating */
3135 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3136 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3137 			break;
3138 		}
3139 
3140 		queue_mask |= (1ull << i);
3141 	}
3142 
3143 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3144 	if (r) {
3145 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3146 		return r;
3147 	}
3148 
3149 	/* set resources */
3150 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3151 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3152 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3153 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3154 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3155 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3156 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3157 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3158 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3159 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3160 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3161 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3162 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3163 
3164 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3165 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3166 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3167 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3168 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3169 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3170 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3171 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3172 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3173 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3174 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3175 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3176 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3177 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3178 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3179 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3180 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3181 	}
3182 
3183 	r = amdgpu_ring_test_helper(kiq_ring);
3184 	if (r)
3185 		DRM_ERROR("KCQ enable failed\n");
3186 
3187 	return r;
3188 }
3189 
3190 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3191 {
3192 	struct amdgpu_device *adev = ring->adev;
3193 	struct v9_mqd *mqd = ring->mqd_ptr;
3194 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3195 	uint32_t tmp;
3196 
3197 	mqd->header = 0xC0310800;
3198 	mqd->compute_pipelinestat_enable = 0x00000001;
3199 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3200 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3201 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3202 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3203 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3204 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3205 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3206 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3207 	mqd->compute_misc_reserved = 0x00000003;
3208 
3209 	mqd->dynamic_cu_mask_addr_lo =
3210 		lower_32_bits(ring->mqd_gpu_addr
3211 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3212 	mqd->dynamic_cu_mask_addr_hi =
3213 		upper_32_bits(ring->mqd_gpu_addr
3214 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3215 
3216 	eop_base_addr = ring->eop_gpu_addr >> 8;
3217 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3218 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3219 
3220 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3221 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3222 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3223 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3224 
3225 	mqd->cp_hqd_eop_control = tmp;
3226 
3227 	/* enable doorbell? */
3228 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3229 
3230 	if (ring->use_doorbell) {
3231 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3232 				    DOORBELL_OFFSET, ring->doorbell_index);
3233 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3234 				    DOORBELL_EN, 1);
3235 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3236 				    DOORBELL_SOURCE, 0);
3237 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3238 				    DOORBELL_HIT, 0);
3239 	} else {
3240 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3241 					 DOORBELL_EN, 0);
3242 	}
3243 
3244 	mqd->cp_hqd_pq_doorbell_control = tmp;
3245 
3246 	/* disable the queue if it's active */
3247 	ring->wptr = 0;
3248 	mqd->cp_hqd_dequeue_request = 0;
3249 	mqd->cp_hqd_pq_rptr = 0;
3250 	mqd->cp_hqd_pq_wptr_lo = 0;
3251 	mqd->cp_hqd_pq_wptr_hi = 0;
3252 
3253 	/* set the pointer to the MQD */
3254 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3255 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3256 
3257 	/* set MQD vmid to 0 */
3258 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3259 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3260 	mqd->cp_mqd_control = tmp;
3261 
3262 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3263 	hqd_gpu_addr = ring->gpu_addr >> 8;
3264 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3265 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3266 
3267 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3268 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3269 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3270 			    (order_base_2(ring->ring_size / 4) - 1));
3271 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3272 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3273 #ifdef __BIG_ENDIAN
3274 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3275 #endif
3276 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3277 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3278 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3279 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3280 	mqd->cp_hqd_pq_control = tmp;
3281 
3282 	/* set the wb address whether it's enabled or not */
3283 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3284 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3285 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3286 		upper_32_bits(wb_gpu_addr) & 0xffff;
3287 
3288 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3289 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3290 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3291 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3292 
3293 	tmp = 0;
3294 	/* enable the doorbell if requested */
3295 	if (ring->use_doorbell) {
3296 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3297 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3298 				DOORBELL_OFFSET, ring->doorbell_index);
3299 
3300 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3301 					 DOORBELL_EN, 1);
3302 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3303 					 DOORBELL_SOURCE, 0);
3304 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3305 					 DOORBELL_HIT, 0);
3306 	}
3307 
3308 	mqd->cp_hqd_pq_doorbell_control = tmp;
3309 
3310 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3311 	ring->wptr = 0;
3312 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3313 
3314 	/* set the vmid for the queue */
3315 	mqd->cp_hqd_vmid = 0;
3316 
3317 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3318 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3319 	mqd->cp_hqd_persistent_state = tmp;
3320 
3321 	/* set MIN_IB_AVAIL_SIZE */
3322 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3323 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3324 	mqd->cp_hqd_ib_control = tmp;
3325 
3326 	/* activate the queue */
3327 	mqd->cp_hqd_active = 1;
3328 
3329 	return 0;
3330 }
3331 
3332 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3333 {
3334 	struct amdgpu_device *adev = ring->adev;
3335 	struct v9_mqd *mqd = ring->mqd_ptr;
3336 	int j;
3337 
3338 	/* disable wptr polling */
3339 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3340 
3341 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3342 	       mqd->cp_hqd_eop_base_addr_lo);
3343 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3344 	       mqd->cp_hqd_eop_base_addr_hi);
3345 
3346 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3347 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3348 	       mqd->cp_hqd_eop_control);
3349 
3350 	/* enable doorbell? */
3351 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3352 	       mqd->cp_hqd_pq_doorbell_control);
3353 
3354 	/* disable the queue if it's active */
3355 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3356 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3357 		for (j = 0; j < adev->usec_timeout; j++) {
3358 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3359 				break;
3360 			udelay(1);
3361 		}
3362 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3363 		       mqd->cp_hqd_dequeue_request);
3364 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3365 		       mqd->cp_hqd_pq_rptr);
3366 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3367 		       mqd->cp_hqd_pq_wptr_lo);
3368 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3369 		       mqd->cp_hqd_pq_wptr_hi);
3370 	}
3371 
3372 	/* set the pointer to the MQD */
3373 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3374 	       mqd->cp_mqd_base_addr_lo);
3375 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3376 	       mqd->cp_mqd_base_addr_hi);
3377 
3378 	/* set MQD vmid to 0 */
3379 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3380 	       mqd->cp_mqd_control);
3381 
3382 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3383 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3384 	       mqd->cp_hqd_pq_base_lo);
3385 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3386 	       mqd->cp_hqd_pq_base_hi);
3387 
3388 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3389 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3390 	       mqd->cp_hqd_pq_control);
3391 
3392 	/* set the wb address whether it's enabled or not */
3393 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3394 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3395 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3396 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3397 
3398 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3399 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3400 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3401 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3402 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3403 
3404 	/* enable the doorbell if requested */
3405 	if (ring->use_doorbell) {
3406 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3407 					(adev->doorbell_index.kiq * 2) << 2);
3408 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3409 					(adev->doorbell_index.userqueue_end * 2) << 2);
3410 	}
3411 
3412 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3413 	       mqd->cp_hqd_pq_doorbell_control);
3414 
3415 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3416 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3417 	       mqd->cp_hqd_pq_wptr_lo);
3418 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3419 	       mqd->cp_hqd_pq_wptr_hi);
3420 
3421 	/* set the vmid for the queue */
3422 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3423 
3424 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3425 	       mqd->cp_hqd_persistent_state);
3426 
3427 	/* activate the queue */
3428 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3429 	       mqd->cp_hqd_active);
3430 
3431 	if (ring->use_doorbell)
3432 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3433 
3434 	return 0;
3435 }
3436 
3437 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3438 {
3439 	struct amdgpu_device *adev = ring->adev;
3440 	int j;
3441 
3442 	/* disable the queue if it's active */
3443 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3444 
3445 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3446 
3447 		for (j = 0; j < adev->usec_timeout; j++) {
3448 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3449 				break;
3450 			udelay(1);
3451 		}
3452 
3453 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3454 			DRM_DEBUG("KIQ dequeue request failed.\n");
3455 
3456 			/* Manual disable if dequeue request times out */
3457 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3458 		}
3459 
3460 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3461 		      0);
3462 	}
3463 
3464 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3465 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3466 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3468 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3469 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3470 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3471 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3472 
3473 	return 0;
3474 }
3475 
3476 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3477 {
3478 	struct amdgpu_device *adev = ring->adev;
3479 	struct v9_mqd *mqd = ring->mqd_ptr;
3480 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3481 
3482 	gfx_v9_0_kiq_setting(ring);
3483 
3484 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3485 		/* reset MQD to a clean status */
3486 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3487 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3488 
3489 		/* reset ring buffer */
3490 		ring->wptr = 0;
3491 		amdgpu_ring_clear_ring(ring);
3492 
3493 		mutex_lock(&adev->srbm_mutex);
3494 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3495 		gfx_v9_0_kiq_init_register(ring);
3496 		soc15_grbm_select(adev, 0, 0, 0, 0);
3497 		mutex_unlock(&adev->srbm_mutex);
3498 	} else {
3499 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3500 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3501 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3502 		mutex_lock(&adev->srbm_mutex);
3503 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3504 		gfx_v9_0_mqd_init(ring);
3505 		gfx_v9_0_kiq_init_register(ring);
3506 		soc15_grbm_select(adev, 0, 0, 0, 0);
3507 		mutex_unlock(&adev->srbm_mutex);
3508 
3509 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3510 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3511 	}
3512 
3513 	return 0;
3514 }
3515 
3516 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3517 {
3518 	struct amdgpu_device *adev = ring->adev;
3519 	struct v9_mqd *mqd = ring->mqd_ptr;
3520 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3521 
3522 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3523 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3524 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3525 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3526 		mutex_lock(&adev->srbm_mutex);
3527 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3528 		gfx_v9_0_mqd_init(ring);
3529 		soc15_grbm_select(adev, 0, 0, 0, 0);
3530 		mutex_unlock(&adev->srbm_mutex);
3531 
3532 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3533 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3534 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3535 		/* reset MQD to a clean status */
3536 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3537 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3538 
3539 		/* reset ring buffer */
3540 		ring->wptr = 0;
3541 		amdgpu_ring_clear_ring(ring);
3542 	} else {
3543 		amdgpu_ring_clear_ring(ring);
3544 	}
3545 
3546 	return 0;
3547 }
3548 
3549 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3550 {
3551 	struct amdgpu_ring *ring;
3552 	int r;
3553 
3554 	ring = &adev->gfx.kiq.ring;
3555 
3556 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3557 	if (unlikely(r != 0))
3558 		return r;
3559 
3560 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3561 	if (unlikely(r != 0))
3562 		return r;
3563 
3564 	gfx_v9_0_kiq_init_queue(ring);
3565 	amdgpu_bo_kunmap(ring->mqd_obj);
3566 	ring->mqd_ptr = NULL;
3567 	amdgpu_bo_unreserve(ring->mqd_obj);
3568 	ring->sched.ready = true;
3569 	return 0;
3570 }
3571 
3572 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3573 {
3574 	struct amdgpu_ring *ring = NULL;
3575 	int r = 0, i;
3576 
3577 	gfx_v9_0_cp_compute_enable(adev, true);
3578 
3579 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3580 		ring = &adev->gfx.compute_ring[i];
3581 
3582 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3583 		if (unlikely(r != 0))
3584 			goto done;
3585 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3586 		if (!r) {
3587 			r = gfx_v9_0_kcq_init_queue(ring);
3588 			amdgpu_bo_kunmap(ring->mqd_obj);
3589 			ring->mqd_ptr = NULL;
3590 		}
3591 		amdgpu_bo_unreserve(ring->mqd_obj);
3592 		if (r)
3593 			goto done;
3594 	}
3595 
3596 	r = gfx_v9_0_kiq_kcq_enable(adev);
3597 done:
3598 	return r;
3599 }
3600 
3601 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3602 {
3603 	int r, i;
3604 	struct amdgpu_ring *ring;
3605 
3606 	if (!(adev->flags & AMD_IS_APU))
3607 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3608 
3609 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3610 		if (adev->asic_type != CHIP_ARCTURUS) {
3611 			/* legacy firmware loading */
3612 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3613 			if (r)
3614 				return r;
3615 		}
3616 
3617 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3618 		if (r)
3619 			return r;
3620 	}
3621 
3622 	r = gfx_v9_0_kiq_resume(adev);
3623 	if (r)
3624 		return r;
3625 
3626 	if (adev->asic_type != CHIP_ARCTURUS) {
3627 		r = gfx_v9_0_cp_gfx_resume(adev);
3628 		if (r)
3629 			return r;
3630 	}
3631 
3632 	r = gfx_v9_0_kcq_resume(adev);
3633 	if (r)
3634 		return r;
3635 
3636 	if (adev->asic_type != CHIP_ARCTURUS) {
3637 		ring = &adev->gfx.gfx_ring[0];
3638 		r = amdgpu_ring_test_helper(ring);
3639 		if (r)
3640 			return r;
3641 	}
3642 
3643 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3644 		ring = &adev->gfx.compute_ring[i];
3645 		amdgpu_ring_test_helper(ring);
3646 	}
3647 
3648 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3649 
3650 	return 0;
3651 }
3652 
3653 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3654 {
3655 	if (adev->asic_type != CHIP_ARCTURUS)
3656 		gfx_v9_0_cp_gfx_enable(adev, enable);
3657 	gfx_v9_0_cp_compute_enable(adev, enable);
3658 }
3659 
3660 static int gfx_v9_0_hw_init(void *handle)
3661 {
3662 	int r;
3663 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3664 
3665 	if (!amdgpu_sriov_vf(adev))
3666 		gfx_v9_0_init_golden_registers(adev);
3667 
3668 	gfx_v9_0_constants_init(adev);
3669 
3670 	r = adev->gfx.rlc.funcs->resume(adev);
3671 	if (r)
3672 		return r;
3673 
3674 	r = gfx_v9_0_cp_resume(adev);
3675 	if (r)
3676 		return r;
3677 
3678 	return r;
3679 }
3680 
3681 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3682 {
3683 	int r, i;
3684 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3685 
3686 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3687 	if (r)
3688 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3689 
3690 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3691 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3692 
3693 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3694 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3695 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3696 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3697 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3698 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3699 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3700 		amdgpu_ring_write(kiq_ring, 0);
3701 		amdgpu_ring_write(kiq_ring, 0);
3702 		amdgpu_ring_write(kiq_ring, 0);
3703 	}
3704 	r = amdgpu_ring_test_helper(kiq_ring);
3705 	if (r)
3706 		DRM_ERROR("KCQ disable failed\n");
3707 
3708 	return r;
3709 }
3710 
3711 static int gfx_v9_0_hw_fini(void *handle)
3712 {
3713 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3714 
3715 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3716 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3717 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3718 
3719 	/* DF freeze and kcq disable will fail */
3720 	if (!amdgpu_ras_intr_triggered())
3721 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3722 		gfx_v9_0_kcq_disable(adev);
3723 
3724 	if (amdgpu_sriov_vf(adev)) {
3725 		gfx_v9_0_cp_gfx_enable(adev, false);
3726 		/* must disable polling for SRIOV when hw finished, otherwise
3727 		 * CPC engine may still keep fetching WB address which is already
3728 		 * invalid after sw finished and trigger DMAR reading error in
3729 		 * hypervisor side.
3730 		 */
3731 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3732 		return 0;
3733 	}
3734 
3735 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3736 	 * otherwise KIQ is hanging when binding back
3737 	 */
3738 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3739 		mutex_lock(&adev->srbm_mutex);
3740 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3741 				adev->gfx.kiq.ring.pipe,
3742 				adev->gfx.kiq.ring.queue, 0);
3743 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3744 		soc15_grbm_select(adev, 0, 0, 0, 0);
3745 		mutex_unlock(&adev->srbm_mutex);
3746 	}
3747 
3748 	gfx_v9_0_cp_enable(adev, false);
3749 	adev->gfx.rlc.funcs->stop(adev);
3750 
3751 	return 0;
3752 }
3753 
3754 static int gfx_v9_0_suspend(void *handle)
3755 {
3756 	return gfx_v9_0_hw_fini(handle);
3757 }
3758 
3759 static int gfx_v9_0_resume(void *handle)
3760 {
3761 	return gfx_v9_0_hw_init(handle);
3762 }
3763 
3764 static bool gfx_v9_0_is_idle(void *handle)
3765 {
3766 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3767 
3768 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3769 				GRBM_STATUS, GUI_ACTIVE))
3770 		return false;
3771 	else
3772 		return true;
3773 }
3774 
3775 static int gfx_v9_0_wait_for_idle(void *handle)
3776 {
3777 	unsigned i;
3778 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3779 
3780 	for (i = 0; i < adev->usec_timeout; i++) {
3781 		if (gfx_v9_0_is_idle(handle))
3782 			return 0;
3783 		udelay(1);
3784 	}
3785 	return -ETIMEDOUT;
3786 }
3787 
3788 static int gfx_v9_0_soft_reset(void *handle)
3789 {
3790 	u32 grbm_soft_reset = 0;
3791 	u32 tmp;
3792 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3793 
3794 	/* GRBM_STATUS */
3795 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3796 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3797 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3798 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3799 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3800 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3801 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3802 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3803 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3804 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3805 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3806 	}
3807 
3808 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3809 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3810 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3811 	}
3812 
3813 	/* GRBM_STATUS2 */
3814 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3815 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3816 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3817 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3818 
3819 
3820 	if (grbm_soft_reset) {
3821 		/* stop the rlc */
3822 		adev->gfx.rlc.funcs->stop(adev);
3823 
3824 		if (adev->asic_type != CHIP_ARCTURUS)
3825 			/* Disable GFX parsing/prefetching */
3826 			gfx_v9_0_cp_gfx_enable(adev, false);
3827 
3828 		/* Disable MEC parsing/prefetching */
3829 		gfx_v9_0_cp_compute_enable(adev, false);
3830 
3831 		if (grbm_soft_reset) {
3832 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3833 			tmp |= grbm_soft_reset;
3834 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3835 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3836 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3837 
3838 			udelay(50);
3839 
3840 			tmp &= ~grbm_soft_reset;
3841 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3842 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3843 		}
3844 
3845 		/* Wait a little for things to settle down */
3846 		udelay(50);
3847 	}
3848 	return 0;
3849 }
3850 
3851 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3852 {
3853 	uint64_t clock;
3854 
3855 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3856 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3857 		uint32_t tmp, lsb, msb, i = 0;
3858 		do {
3859 			if (i != 0)
3860 				udelay(1);
3861 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3862 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3863 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3864 			i++;
3865 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3866 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3867 	} else {
3868 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3869 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3870 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3871 	}
3872 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3873 	return clock;
3874 }
3875 
3876 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3877 					  uint32_t vmid,
3878 					  uint32_t gds_base, uint32_t gds_size,
3879 					  uint32_t gws_base, uint32_t gws_size,
3880 					  uint32_t oa_base, uint32_t oa_size)
3881 {
3882 	struct amdgpu_device *adev = ring->adev;
3883 
3884 	/* GDS Base */
3885 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3886 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3887 				   gds_base);
3888 
3889 	/* GDS Size */
3890 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3891 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3892 				   gds_size);
3893 
3894 	/* GWS */
3895 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3896 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3897 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3898 
3899 	/* OA */
3900 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3901 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3902 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3903 }
3904 
3905 static const u32 vgpr_init_compute_shader[] =
3906 {
3907 	0xb07c0000, 0xbe8000ff,
3908 	0x000000f8, 0xbf110800,
3909 	0x7e000280, 0x7e020280,
3910 	0x7e040280, 0x7e060280,
3911 	0x7e080280, 0x7e0a0280,
3912 	0x7e0c0280, 0x7e0e0280,
3913 	0x80808800, 0xbe803200,
3914 	0xbf84fff5, 0xbf9c0000,
3915 	0xd28c0001, 0x0001007f,
3916 	0xd28d0001, 0x0002027e,
3917 	0x10020288, 0xb8810904,
3918 	0xb7814000, 0xd1196a01,
3919 	0x00000301, 0xbe800087,
3920 	0xbefc00c1, 0xd89c4000,
3921 	0x00020201, 0xd89cc080,
3922 	0x00040401, 0x320202ff,
3923 	0x00000800, 0x80808100,
3924 	0xbf84fff8, 0x7e020280,
3925 	0xbf810000, 0x00000000,
3926 };
3927 
3928 static const u32 sgpr_init_compute_shader[] =
3929 {
3930 	0xb07c0000, 0xbe8000ff,
3931 	0x0000005f, 0xbee50080,
3932 	0xbe812c65, 0xbe822c65,
3933 	0xbe832c65, 0xbe842c65,
3934 	0xbe852c65, 0xb77c0005,
3935 	0x80808500, 0xbf84fff8,
3936 	0xbe800080, 0xbf810000,
3937 };
3938 
3939 static const struct soc15_reg_entry vgpr_init_regs[] = {
3940    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3941    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3942    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3943    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3944    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3945    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3946    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3947    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3948    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3949    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3950 };
3951 
3952 static const struct soc15_reg_entry sgpr_init_regs[] = {
3953    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3954    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3955    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3956    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3957    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3958    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3959    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3960    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3961    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3962    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3963 };
3964 
3965 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3966    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3967    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3968    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3969    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3970    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3971    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3972    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3973    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3974    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3975    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3976    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3977    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3978    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3979    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3980    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3981    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3982    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3983    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3984    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3985    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3986    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
3987    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3988    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3989    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3990    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3991    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3992    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3993    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3994    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3995    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3996    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3997    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3998    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3999 };
4000 
4001 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4002 {
4003 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4004 	int i, r;
4005 
4006 	/* only support when RAS is enabled */
4007 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4008 		return 0;
4009 
4010 	r = amdgpu_ring_alloc(ring, 7);
4011 	if (r) {
4012 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4013 			ring->name, r);
4014 		return r;
4015 	}
4016 
4017 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4018 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4019 
4020 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4021 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4022 				PACKET3_DMA_DATA_DST_SEL(1) |
4023 				PACKET3_DMA_DATA_SRC_SEL(2) |
4024 				PACKET3_DMA_DATA_ENGINE(0)));
4025 	amdgpu_ring_write(ring, 0);
4026 	amdgpu_ring_write(ring, 0);
4027 	amdgpu_ring_write(ring, 0);
4028 	amdgpu_ring_write(ring, 0);
4029 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4030 				adev->gds.gds_size);
4031 
4032 	amdgpu_ring_commit(ring);
4033 
4034 	for (i = 0; i < adev->usec_timeout; i++) {
4035 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4036 			break;
4037 		udelay(1);
4038 	}
4039 
4040 	if (i >= adev->usec_timeout)
4041 		r = -ETIMEDOUT;
4042 
4043 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4044 
4045 	return r;
4046 }
4047 
4048 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4049 {
4050 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4051 	struct amdgpu_ib ib;
4052 	struct dma_fence *f = NULL;
4053 	int r, i, j, k;
4054 	unsigned total_size, vgpr_offset, sgpr_offset;
4055 	u64 gpu_addr;
4056 
4057 	/* only support when RAS is enabled */
4058 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4059 		return 0;
4060 
4061 	/* bail if the compute ring is not ready */
4062 	if (!ring->sched.ready)
4063 		return 0;
4064 
4065 	total_size =
4066 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4067 	total_size +=
4068 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4069 	total_size = ALIGN(total_size, 256);
4070 	vgpr_offset = total_size;
4071 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4072 	sgpr_offset = total_size;
4073 	total_size += sizeof(sgpr_init_compute_shader);
4074 
4075 	/* allocate an indirect buffer to put the commands in */
4076 	memset(&ib, 0, sizeof(ib));
4077 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4078 	if (r) {
4079 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4080 		return r;
4081 	}
4082 
4083 	/* load the compute shaders */
4084 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4085 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4086 
4087 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4088 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4089 
4090 	/* init the ib length to 0 */
4091 	ib.length_dw = 0;
4092 
4093 	/* VGPR */
4094 	/* write the register state for the compute dispatch */
4095 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4096 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4097 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4098 								- PACKET3_SET_SH_REG_START;
4099 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4100 	}
4101 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4102 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4103 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4104 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4105 							- PACKET3_SET_SH_REG_START;
4106 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4107 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4108 
4109 	/* write dispatch packet */
4110 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4111 	ib.ptr[ib.length_dw++] = 128; /* x */
4112 	ib.ptr[ib.length_dw++] = 1; /* y */
4113 	ib.ptr[ib.length_dw++] = 1; /* z */
4114 	ib.ptr[ib.length_dw++] =
4115 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4116 
4117 	/* write CS partial flush packet */
4118 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4119 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4120 
4121 	/* SGPR */
4122 	/* write the register state for the compute dispatch */
4123 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4124 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4125 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4126 								- PACKET3_SET_SH_REG_START;
4127 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4128 	}
4129 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4130 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4131 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4132 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4133 							- PACKET3_SET_SH_REG_START;
4134 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4135 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4136 
4137 	/* write dispatch packet */
4138 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4139 	ib.ptr[ib.length_dw++] = 128; /* x */
4140 	ib.ptr[ib.length_dw++] = 1; /* y */
4141 	ib.ptr[ib.length_dw++] = 1; /* z */
4142 	ib.ptr[ib.length_dw++] =
4143 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4144 
4145 	/* write CS partial flush packet */
4146 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4147 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4148 
4149 	/* shedule the ib on the ring */
4150 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4151 	if (r) {
4152 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4153 		goto fail;
4154 	}
4155 
4156 	/* wait for the GPU to finish processing the IB */
4157 	r = dma_fence_wait(f, false);
4158 	if (r) {
4159 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4160 		goto fail;
4161 	}
4162 
4163 	/* read back registers to clear the counters */
4164 	mutex_lock(&adev->grbm_idx_mutex);
4165 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4166 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4167 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4168 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4169 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4170 			}
4171 		}
4172 	}
4173 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4174 	mutex_unlock(&adev->grbm_idx_mutex);
4175 
4176 fail:
4177 	amdgpu_ib_free(adev, &ib, NULL);
4178 	dma_fence_put(f);
4179 
4180 	return r;
4181 }
4182 
4183 static int gfx_v9_0_early_init(void *handle)
4184 {
4185 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4186 
4187 	if (adev->asic_type == CHIP_ARCTURUS)
4188 		adev->gfx.num_gfx_rings = 0;
4189 	else
4190 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4191 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4192 	gfx_v9_0_set_ring_funcs(adev);
4193 	gfx_v9_0_set_irq_funcs(adev);
4194 	gfx_v9_0_set_gds_init(adev);
4195 	gfx_v9_0_set_rlc_funcs(adev);
4196 
4197 	return 0;
4198 }
4199 
4200 static int gfx_v9_0_ecc_late_init(void *handle)
4201 {
4202 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4203 	int r;
4204 
4205 	r = amdgpu_gfx_ras_late_init(adev);
4206 	if (r)
4207 		return r;
4208 
4209 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4210 	if (r)
4211 		return r;
4212 
4213 	/* requires IBs so do in late init after IB pool is initialized */
4214 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4215 	if (r)
4216 		return r;
4217 
4218 	return 0;
4219 }
4220 
4221 static int gfx_v9_0_late_init(void *handle)
4222 {
4223 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4224 	int r;
4225 
4226 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4227 	if (r)
4228 		return r;
4229 
4230 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4231 	if (r)
4232 		return r;
4233 
4234 	r = gfx_v9_0_ecc_late_init(handle);
4235 	if (r)
4236 		return r;
4237 
4238 	return 0;
4239 }
4240 
4241 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4242 {
4243 	uint32_t rlc_setting;
4244 
4245 	/* if RLC is not enabled, do nothing */
4246 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4247 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4248 		return false;
4249 
4250 	return true;
4251 }
4252 
4253 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4254 {
4255 	uint32_t data;
4256 	unsigned i;
4257 
4258 	data = RLC_SAFE_MODE__CMD_MASK;
4259 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4260 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4261 
4262 	/* wait for RLC_SAFE_MODE */
4263 	for (i = 0; i < adev->usec_timeout; i++) {
4264 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4265 			break;
4266 		udelay(1);
4267 	}
4268 }
4269 
4270 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4271 {
4272 	uint32_t data;
4273 
4274 	data = RLC_SAFE_MODE__CMD_MASK;
4275 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4276 }
4277 
4278 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4279 						bool enable)
4280 {
4281 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4282 
4283 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4284 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4285 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4286 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4287 	} else {
4288 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4289 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4290 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4291 	}
4292 
4293 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4294 }
4295 
4296 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4297 						bool enable)
4298 {
4299 	/* TODO: double check if we need to perform under safe mode */
4300 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4301 
4302 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4303 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4304 	else
4305 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4306 
4307 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4308 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4309 	else
4310 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4311 
4312 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4313 }
4314 
4315 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4316 						      bool enable)
4317 {
4318 	uint32_t data, def;
4319 
4320 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4321 
4322 	/* It is disabled by HW by default */
4323 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4324 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4325 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4326 
4327 		if (adev->asic_type != CHIP_VEGA12)
4328 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4329 
4330 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4331 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4332 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4333 
4334 		/* only for Vega10 & Raven1 */
4335 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4336 
4337 		if (def != data)
4338 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4339 
4340 		/* MGLS is a global flag to control all MGLS in GFX */
4341 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4342 			/* 2 - RLC memory Light sleep */
4343 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4344 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4345 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4346 				if (def != data)
4347 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4348 			}
4349 			/* 3 - CP memory Light sleep */
4350 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4351 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4352 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4353 				if (def != data)
4354 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4355 			}
4356 		}
4357 	} else {
4358 		/* 1 - MGCG_OVERRIDE */
4359 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4360 
4361 		if (adev->asic_type != CHIP_VEGA12)
4362 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4363 
4364 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4365 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4366 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4367 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4368 
4369 		if (def != data)
4370 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4371 
4372 		/* 2 - disable MGLS in RLC */
4373 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4374 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4375 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4376 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4377 		}
4378 
4379 		/* 3 - disable MGLS in CP */
4380 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4381 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4382 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4383 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4384 		}
4385 	}
4386 
4387 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4388 }
4389 
4390 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4391 					   bool enable)
4392 {
4393 	uint32_t data, def;
4394 
4395 	if (adev->asic_type == CHIP_ARCTURUS)
4396 		return;
4397 
4398 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4399 
4400 	/* Enable 3D CGCG/CGLS */
4401 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4402 		/* write cmd to clear cgcg/cgls ov */
4403 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4404 		/* unset CGCG override */
4405 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4406 		/* update CGCG and CGLS override bits */
4407 		if (def != data)
4408 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4409 
4410 		/* enable 3Dcgcg FSM(0x0000363f) */
4411 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4412 
4413 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4414 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4415 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4416 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4417 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4418 		if (def != data)
4419 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4420 
4421 		/* set IDLE_POLL_COUNT(0x00900100) */
4422 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4423 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4424 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4425 		if (def != data)
4426 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4427 	} else {
4428 		/* Disable CGCG/CGLS */
4429 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4430 		/* disable cgcg, cgls should be disabled */
4431 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4432 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4433 		/* disable cgcg and cgls in FSM */
4434 		if (def != data)
4435 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4436 	}
4437 
4438 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4439 }
4440 
4441 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4442 						      bool enable)
4443 {
4444 	uint32_t def, data;
4445 
4446 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4447 
4448 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4449 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4450 		/* unset CGCG override */
4451 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4452 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4453 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4454 		else
4455 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4456 		/* update CGCG and CGLS override bits */
4457 		if (def != data)
4458 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4459 
4460 		/* enable cgcg FSM(0x0000363F) */
4461 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4462 
4463 		if (adev->asic_type == CHIP_ARCTURUS)
4464 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4465 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4466 		else
4467 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4468 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4469 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4470 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4471 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4472 		if (def != data)
4473 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4474 
4475 		/* set IDLE_POLL_COUNT(0x00900100) */
4476 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4477 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4478 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4479 		if (def != data)
4480 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4481 	} else {
4482 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4483 		/* reset CGCG/CGLS bits */
4484 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4485 		/* disable cgcg and cgls in FSM */
4486 		if (def != data)
4487 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4488 	}
4489 
4490 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4491 }
4492 
4493 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4494 					    bool enable)
4495 {
4496 	if (enable) {
4497 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4498 		 * ===  MGCG + MGLS ===
4499 		 */
4500 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4501 		/* ===  CGCG /CGLS for GFX 3D Only === */
4502 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4503 		/* ===  CGCG + CGLS === */
4504 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4505 	} else {
4506 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4507 		 * ===  CGCG + CGLS ===
4508 		 */
4509 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4510 		/* ===  CGCG /CGLS for GFX 3D Only === */
4511 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4512 		/* ===  MGCG + MGLS === */
4513 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4514 	}
4515 	return 0;
4516 }
4517 
4518 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4519 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4520 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4521 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4522 	.init = gfx_v9_0_rlc_init,
4523 	.get_csb_size = gfx_v9_0_get_csb_size,
4524 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4525 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4526 	.resume = gfx_v9_0_rlc_resume,
4527 	.stop = gfx_v9_0_rlc_stop,
4528 	.reset = gfx_v9_0_rlc_reset,
4529 	.start = gfx_v9_0_rlc_start
4530 };
4531 
4532 static int gfx_v9_0_set_powergating_state(void *handle,
4533 					  enum amd_powergating_state state)
4534 {
4535 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4536 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4537 
4538 	switch (adev->asic_type) {
4539 	case CHIP_RAVEN:
4540 	case CHIP_RENOIR:
4541 		if (!enable) {
4542 			amdgpu_gfx_off_ctrl(adev, false);
4543 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4544 		}
4545 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4546 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4547 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4548 		} else {
4549 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4550 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4551 		}
4552 
4553 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4554 			gfx_v9_0_enable_cp_power_gating(adev, true);
4555 		else
4556 			gfx_v9_0_enable_cp_power_gating(adev, false);
4557 
4558 		/* update gfx cgpg state */
4559 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4560 
4561 		/* update mgcg state */
4562 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4563 
4564 		if (enable)
4565 			amdgpu_gfx_off_ctrl(adev, true);
4566 		break;
4567 	case CHIP_VEGA12:
4568 		if (!enable) {
4569 			amdgpu_gfx_off_ctrl(adev, false);
4570 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4571 		} else {
4572 			amdgpu_gfx_off_ctrl(adev, true);
4573 		}
4574 		break;
4575 	default:
4576 		break;
4577 	}
4578 
4579 	return 0;
4580 }
4581 
4582 static int gfx_v9_0_set_clockgating_state(void *handle,
4583 					  enum amd_clockgating_state state)
4584 {
4585 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4586 
4587 	if (amdgpu_sriov_vf(adev))
4588 		return 0;
4589 
4590 	switch (adev->asic_type) {
4591 	case CHIP_VEGA10:
4592 	case CHIP_VEGA12:
4593 	case CHIP_VEGA20:
4594 	case CHIP_RAVEN:
4595 	case CHIP_ARCTURUS:
4596 	case CHIP_RENOIR:
4597 		gfx_v9_0_update_gfx_clock_gating(adev,
4598 						 state == AMD_CG_STATE_GATE ? true : false);
4599 		break;
4600 	default:
4601 		break;
4602 	}
4603 	return 0;
4604 }
4605 
4606 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4607 {
4608 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4609 	int data;
4610 
4611 	if (amdgpu_sriov_vf(adev))
4612 		*flags = 0;
4613 
4614 	/* AMD_CG_SUPPORT_GFX_MGCG */
4615 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4616 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4617 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4618 
4619 	/* AMD_CG_SUPPORT_GFX_CGCG */
4620 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4621 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4622 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4623 
4624 	/* AMD_CG_SUPPORT_GFX_CGLS */
4625 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4626 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4627 
4628 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4629 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4630 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4631 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4632 
4633 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4634 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4635 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4636 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4637 
4638 	if (adev->asic_type != CHIP_ARCTURUS) {
4639 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4640 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4641 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4642 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4643 
4644 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4645 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4646 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4647 	}
4648 }
4649 
4650 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4651 {
4652 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4653 }
4654 
4655 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4656 {
4657 	struct amdgpu_device *adev = ring->adev;
4658 	u64 wptr;
4659 
4660 	/* XXX check if swapping is necessary on BE */
4661 	if (ring->use_doorbell) {
4662 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4663 	} else {
4664 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4665 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4666 	}
4667 
4668 	return wptr;
4669 }
4670 
4671 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4672 {
4673 	struct amdgpu_device *adev = ring->adev;
4674 
4675 	if (ring->use_doorbell) {
4676 		/* XXX check if swapping is necessary on BE */
4677 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4678 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4679 	} else {
4680 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4681 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4682 	}
4683 }
4684 
4685 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4686 {
4687 	struct amdgpu_device *adev = ring->adev;
4688 	u32 ref_and_mask, reg_mem_engine;
4689 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4690 
4691 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4692 		switch (ring->me) {
4693 		case 1:
4694 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4695 			break;
4696 		case 2:
4697 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4698 			break;
4699 		default:
4700 			return;
4701 		}
4702 		reg_mem_engine = 0;
4703 	} else {
4704 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4705 		reg_mem_engine = 1; /* pfp */
4706 	}
4707 
4708 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4709 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4710 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4711 			      ref_and_mask, ref_and_mask, 0x20);
4712 }
4713 
4714 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4715 					struct amdgpu_job *job,
4716 					struct amdgpu_ib *ib,
4717 					uint32_t flags)
4718 {
4719 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4720 	u32 header, control = 0;
4721 
4722 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4723 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4724 	else
4725 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4726 
4727 	control |= ib->length_dw | (vmid << 24);
4728 
4729 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4730 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4731 
4732 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4733 			gfx_v9_0_ring_emit_de_meta(ring);
4734 	}
4735 
4736 	amdgpu_ring_write(ring, header);
4737 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4738 	amdgpu_ring_write(ring,
4739 #ifdef __BIG_ENDIAN
4740 		(2 << 0) |
4741 #endif
4742 		lower_32_bits(ib->gpu_addr));
4743 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4744 	amdgpu_ring_write(ring, control);
4745 }
4746 
4747 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4748 					  struct amdgpu_job *job,
4749 					  struct amdgpu_ib *ib,
4750 					  uint32_t flags)
4751 {
4752 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4753 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4754 
4755 	/* Currently, there is a high possibility to get wave ID mismatch
4756 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4757 	 * different wave IDs than the GDS expects. This situation happens
4758 	 * randomly when at least 5 compute pipes use GDS ordered append.
4759 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4760 	 * Those are probably bugs somewhere else in the kernel driver.
4761 	 *
4762 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4763 	 * GDS to 0 for this ring (me/pipe).
4764 	 */
4765 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4766 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4767 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4768 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4769 	}
4770 
4771 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4772 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4773 	amdgpu_ring_write(ring,
4774 #ifdef __BIG_ENDIAN
4775 				(2 << 0) |
4776 #endif
4777 				lower_32_bits(ib->gpu_addr));
4778 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4779 	amdgpu_ring_write(ring, control);
4780 }
4781 
4782 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4783 				     u64 seq, unsigned flags)
4784 {
4785 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4786 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4787 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4788 
4789 	/* RELEASE_MEM - flush caches, send int */
4790 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4791 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4792 					       EOP_TC_NC_ACTION_EN) :
4793 					      (EOP_TCL1_ACTION_EN |
4794 					       EOP_TC_ACTION_EN |
4795 					       EOP_TC_WB_ACTION_EN |
4796 					       EOP_TC_MD_ACTION_EN)) |
4797 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4798 				 EVENT_INDEX(5)));
4799 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4800 
4801 	/*
4802 	 * the address should be Qword aligned if 64bit write, Dword
4803 	 * aligned if only send 32bit data low (discard data high)
4804 	 */
4805 	if (write64bit)
4806 		BUG_ON(addr & 0x7);
4807 	else
4808 		BUG_ON(addr & 0x3);
4809 	amdgpu_ring_write(ring, lower_32_bits(addr));
4810 	amdgpu_ring_write(ring, upper_32_bits(addr));
4811 	amdgpu_ring_write(ring, lower_32_bits(seq));
4812 	amdgpu_ring_write(ring, upper_32_bits(seq));
4813 	amdgpu_ring_write(ring, 0);
4814 }
4815 
4816 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4817 {
4818 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4819 	uint32_t seq = ring->fence_drv.sync_seq;
4820 	uint64_t addr = ring->fence_drv.gpu_addr;
4821 
4822 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4823 			      lower_32_bits(addr), upper_32_bits(addr),
4824 			      seq, 0xffffffff, 4);
4825 }
4826 
4827 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4828 					unsigned vmid, uint64_t pd_addr)
4829 {
4830 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4831 
4832 	/* compute doesn't have PFP */
4833 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4834 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4835 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4836 		amdgpu_ring_write(ring, 0x0);
4837 	}
4838 }
4839 
4840 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4841 {
4842 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4843 }
4844 
4845 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4846 {
4847 	u64 wptr;
4848 
4849 	/* XXX check if swapping is necessary on BE */
4850 	if (ring->use_doorbell)
4851 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4852 	else
4853 		BUG();
4854 	return wptr;
4855 }
4856 
4857 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4858 					   bool acquire)
4859 {
4860 	struct amdgpu_device *adev = ring->adev;
4861 	int pipe_num, tmp, reg;
4862 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4863 
4864 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4865 
4866 	/* first me only has 2 entries, GFX and HP3D */
4867 	if (ring->me > 0)
4868 		pipe_num -= 2;
4869 
4870 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4871 	tmp = RREG32(reg);
4872 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4873 	WREG32(reg, tmp);
4874 }
4875 
4876 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4877 					    struct amdgpu_ring *ring,
4878 					    bool acquire)
4879 {
4880 	int i, pipe;
4881 	bool reserve;
4882 	struct amdgpu_ring *iring;
4883 
4884 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4885 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4886 	if (acquire)
4887 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4888 	else
4889 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4890 
4891 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4892 		/* Clear all reservations - everyone reacquires all resources */
4893 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4894 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4895 						       true);
4896 
4897 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4898 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4899 						       true);
4900 	} else {
4901 		/* Lower all pipes without a current reservation */
4902 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4903 			iring = &adev->gfx.gfx_ring[i];
4904 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4905 							   iring->me,
4906 							   iring->pipe,
4907 							   0);
4908 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4909 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4910 		}
4911 
4912 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4913 			iring = &adev->gfx.compute_ring[i];
4914 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4915 							   iring->me,
4916 							   iring->pipe,
4917 							   0);
4918 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4919 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4920 		}
4921 	}
4922 
4923 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4924 }
4925 
4926 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4927 				      struct amdgpu_ring *ring,
4928 				      bool acquire)
4929 {
4930 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4931 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4932 
4933 	mutex_lock(&adev->srbm_mutex);
4934 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4935 
4936 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4937 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4938 
4939 	soc15_grbm_select(adev, 0, 0, 0, 0);
4940 	mutex_unlock(&adev->srbm_mutex);
4941 }
4942 
4943 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4944 					       enum drm_sched_priority priority)
4945 {
4946 	struct amdgpu_device *adev = ring->adev;
4947 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4948 
4949 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4950 		return;
4951 
4952 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4953 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4954 }
4955 
4956 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4957 {
4958 	struct amdgpu_device *adev = ring->adev;
4959 
4960 	/* XXX check if swapping is necessary on BE */
4961 	if (ring->use_doorbell) {
4962 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4963 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4964 	} else{
4965 		BUG(); /* only DOORBELL method supported on gfx9 now */
4966 	}
4967 }
4968 
4969 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4970 					 u64 seq, unsigned int flags)
4971 {
4972 	struct amdgpu_device *adev = ring->adev;
4973 
4974 	/* we only allocate 32bit for each seq wb address */
4975 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4976 
4977 	/* write fence seq to the "addr" */
4978 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4979 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4980 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4981 	amdgpu_ring_write(ring, lower_32_bits(addr));
4982 	amdgpu_ring_write(ring, upper_32_bits(addr));
4983 	amdgpu_ring_write(ring, lower_32_bits(seq));
4984 
4985 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4986 		/* set register to trigger INT */
4987 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4988 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4989 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4990 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4991 		amdgpu_ring_write(ring, 0);
4992 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4993 	}
4994 }
4995 
4996 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4997 {
4998 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4999 	amdgpu_ring_write(ring, 0);
5000 }
5001 
5002 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5003 {
5004 	struct v9_ce_ib_state ce_payload = {0};
5005 	uint64_t csa_addr;
5006 	int cnt;
5007 
5008 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5009 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5010 
5011 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5012 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5013 				 WRITE_DATA_DST_SEL(8) |
5014 				 WR_CONFIRM) |
5015 				 WRITE_DATA_CACHE_POLICY(0));
5016 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5017 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5018 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5019 }
5020 
5021 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5022 {
5023 	struct v9_de_ib_state de_payload = {0};
5024 	uint64_t csa_addr, gds_addr;
5025 	int cnt;
5026 
5027 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5028 	gds_addr = csa_addr + 4096;
5029 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5030 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5031 
5032 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5033 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5034 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5035 				 WRITE_DATA_DST_SEL(8) |
5036 				 WR_CONFIRM) |
5037 				 WRITE_DATA_CACHE_POLICY(0));
5038 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5039 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5040 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5041 }
5042 
5043 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5044 {
5045 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5046 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5047 }
5048 
5049 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5050 {
5051 	uint32_t dw2 = 0;
5052 
5053 	if (amdgpu_sriov_vf(ring->adev))
5054 		gfx_v9_0_ring_emit_ce_meta(ring);
5055 
5056 	gfx_v9_0_ring_emit_tmz(ring, true);
5057 
5058 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5059 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5060 		/* set load_global_config & load_global_uconfig */
5061 		dw2 |= 0x8001;
5062 		/* set load_cs_sh_regs */
5063 		dw2 |= 0x01000000;
5064 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5065 		dw2 |= 0x10002;
5066 
5067 		/* set load_ce_ram if preamble presented */
5068 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5069 			dw2 |= 0x10000000;
5070 	} else {
5071 		/* still load_ce_ram if this is the first time preamble presented
5072 		 * although there is no context switch happens.
5073 		 */
5074 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5075 			dw2 |= 0x10000000;
5076 	}
5077 
5078 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5079 	amdgpu_ring_write(ring, dw2);
5080 	amdgpu_ring_write(ring, 0);
5081 }
5082 
5083 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5084 {
5085 	unsigned ret;
5086 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5087 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5088 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5089 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5090 	ret = ring->wptr & ring->buf_mask;
5091 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5092 	return ret;
5093 }
5094 
5095 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5096 {
5097 	unsigned cur;
5098 	BUG_ON(offset > ring->buf_mask);
5099 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5100 
5101 	cur = (ring->wptr & ring->buf_mask) - 1;
5102 	if (likely(cur > offset))
5103 		ring->ring[offset] = cur - offset;
5104 	else
5105 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5106 }
5107 
5108 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5109 {
5110 	struct amdgpu_device *adev = ring->adev;
5111 
5112 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5113 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5114 				(5 << 8) |	/* dst: memory */
5115 				(1 << 20));	/* write confirm */
5116 	amdgpu_ring_write(ring, reg);
5117 	amdgpu_ring_write(ring, 0);
5118 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5119 				adev->virt.reg_val_offs * 4));
5120 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5121 				adev->virt.reg_val_offs * 4));
5122 }
5123 
5124 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5125 				    uint32_t val)
5126 {
5127 	uint32_t cmd = 0;
5128 
5129 	switch (ring->funcs->type) {
5130 	case AMDGPU_RING_TYPE_GFX:
5131 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5132 		break;
5133 	case AMDGPU_RING_TYPE_KIQ:
5134 		cmd = (1 << 16); /* no inc addr */
5135 		break;
5136 	default:
5137 		cmd = WR_CONFIRM;
5138 		break;
5139 	}
5140 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5141 	amdgpu_ring_write(ring, cmd);
5142 	amdgpu_ring_write(ring, reg);
5143 	amdgpu_ring_write(ring, 0);
5144 	amdgpu_ring_write(ring, val);
5145 }
5146 
5147 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5148 					uint32_t val, uint32_t mask)
5149 {
5150 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5151 }
5152 
5153 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5154 						  uint32_t reg0, uint32_t reg1,
5155 						  uint32_t ref, uint32_t mask)
5156 {
5157 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5158 	struct amdgpu_device *adev = ring->adev;
5159 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5160 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5161 
5162 	if (fw_version_ok)
5163 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5164 				      ref, mask, 0x20);
5165 	else
5166 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5167 							   ref, mask);
5168 }
5169 
5170 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5171 {
5172 	struct amdgpu_device *adev = ring->adev;
5173 	uint32_t value = 0;
5174 
5175 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5176 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5177 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5178 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5179 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5180 }
5181 
5182 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5183 						 enum amdgpu_interrupt_state state)
5184 {
5185 	switch (state) {
5186 	case AMDGPU_IRQ_STATE_DISABLE:
5187 	case AMDGPU_IRQ_STATE_ENABLE:
5188 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5189 			       TIME_STAMP_INT_ENABLE,
5190 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5191 		break;
5192 	default:
5193 		break;
5194 	}
5195 }
5196 
5197 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5198 						     int me, int pipe,
5199 						     enum amdgpu_interrupt_state state)
5200 {
5201 	u32 mec_int_cntl, mec_int_cntl_reg;
5202 
5203 	/*
5204 	 * amdgpu controls only the first MEC. That's why this function only
5205 	 * handles the setting of interrupts for this specific MEC. All other
5206 	 * pipes' interrupts are set by amdkfd.
5207 	 */
5208 
5209 	if (me == 1) {
5210 		switch (pipe) {
5211 		case 0:
5212 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5213 			break;
5214 		case 1:
5215 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5216 			break;
5217 		case 2:
5218 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5219 			break;
5220 		case 3:
5221 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5222 			break;
5223 		default:
5224 			DRM_DEBUG("invalid pipe %d\n", pipe);
5225 			return;
5226 		}
5227 	} else {
5228 		DRM_DEBUG("invalid me %d\n", me);
5229 		return;
5230 	}
5231 
5232 	switch (state) {
5233 	case AMDGPU_IRQ_STATE_DISABLE:
5234 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5235 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5236 					     TIME_STAMP_INT_ENABLE, 0);
5237 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5238 		break;
5239 	case AMDGPU_IRQ_STATE_ENABLE:
5240 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5241 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5242 					     TIME_STAMP_INT_ENABLE, 1);
5243 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5244 		break;
5245 	default:
5246 		break;
5247 	}
5248 }
5249 
5250 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5251 					     struct amdgpu_irq_src *source,
5252 					     unsigned type,
5253 					     enum amdgpu_interrupt_state state)
5254 {
5255 	switch (state) {
5256 	case AMDGPU_IRQ_STATE_DISABLE:
5257 	case AMDGPU_IRQ_STATE_ENABLE:
5258 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5259 			       PRIV_REG_INT_ENABLE,
5260 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5261 		break;
5262 	default:
5263 		break;
5264 	}
5265 
5266 	return 0;
5267 }
5268 
5269 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5270 					      struct amdgpu_irq_src *source,
5271 					      unsigned type,
5272 					      enum amdgpu_interrupt_state state)
5273 {
5274 	switch (state) {
5275 	case AMDGPU_IRQ_STATE_DISABLE:
5276 	case AMDGPU_IRQ_STATE_ENABLE:
5277 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5278 			       PRIV_INSTR_INT_ENABLE,
5279 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5280 	default:
5281 		break;
5282 	}
5283 
5284 	return 0;
5285 }
5286 
5287 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5288 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5289 			CP_ECC_ERROR_INT_ENABLE, 1)
5290 
5291 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5292 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5293 			CP_ECC_ERROR_INT_ENABLE, 0)
5294 
5295 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5296 					      struct amdgpu_irq_src *source,
5297 					      unsigned type,
5298 					      enum amdgpu_interrupt_state state)
5299 {
5300 	switch (state) {
5301 	case AMDGPU_IRQ_STATE_DISABLE:
5302 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5303 				CP_ECC_ERROR_INT_ENABLE, 0);
5304 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5305 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5306 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5307 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5308 		break;
5309 
5310 	case AMDGPU_IRQ_STATE_ENABLE:
5311 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5312 				CP_ECC_ERROR_INT_ENABLE, 1);
5313 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5314 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5315 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5316 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5317 		break;
5318 	default:
5319 		break;
5320 	}
5321 
5322 	return 0;
5323 }
5324 
5325 
5326 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5327 					    struct amdgpu_irq_src *src,
5328 					    unsigned type,
5329 					    enum amdgpu_interrupt_state state)
5330 {
5331 	switch (type) {
5332 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5333 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5334 		break;
5335 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5336 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5337 		break;
5338 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5339 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5340 		break;
5341 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5342 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5343 		break;
5344 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5345 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5346 		break;
5347 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5348 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5349 		break;
5350 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5351 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5352 		break;
5353 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5354 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5355 		break;
5356 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5357 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5358 		break;
5359 	default:
5360 		break;
5361 	}
5362 	return 0;
5363 }
5364 
5365 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5366 			    struct amdgpu_irq_src *source,
5367 			    struct amdgpu_iv_entry *entry)
5368 {
5369 	int i;
5370 	u8 me_id, pipe_id, queue_id;
5371 	struct amdgpu_ring *ring;
5372 
5373 	DRM_DEBUG("IH: CP EOP\n");
5374 	me_id = (entry->ring_id & 0x0c) >> 2;
5375 	pipe_id = (entry->ring_id & 0x03) >> 0;
5376 	queue_id = (entry->ring_id & 0x70) >> 4;
5377 
5378 	switch (me_id) {
5379 	case 0:
5380 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5381 		break;
5382 	case 1:
5383 	case 2:
5384 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5385 			ring = &adev->gfx.compute_ring[i];
5386 			/* Per-queue interrupt is supported for MEC starting from VI.
5387 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5388 			  */
5389 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5390 				amdgpu_fence_process(ring);
5391 		}
5392 		break;
5393 	}
5394 	return 0;
5395 }
5396 
5397 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5398 			   struct amdgpu_iv_entry *entry)
5399 {
5400 	u8 me_id, pipe_id, queue_id;
5401 	struct amdgpu_ring *ring;
5402 	int i;
5403 
5404 	me_id = (entry->ring_id & 0x0c) >> 2;
5405 	pipe_id = (entry->ring_id & 0x03) >> 0;
5406 	queue_id = (entry->ring_id & 0x70) >> 4;
5407 
5408 	switch (me_id) {
5409 	case 0:
5410 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5411 		break;
5412 	case 1:
5413 	case 2:
5414 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5415 			ring = &adev->gfx.compute_ring[i];
5416 			if (ring->me == me_id && ring->pipe == pipe_id &&
5417 			    ring->queue == queue_id)
5418 				drm_sched_fault(&ring->sched);
5419 		}
5420 		break;
5421 	}
5422 }
5423 
5424 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5425 				 struct amdgpu_irq_src *source,
5426 				 struct amdgpu_iv_entry *entry)
5427 {
5428 	DRM_ERROR("Illegal register access in command stream\n");
5429 	gfx_v9_0_fault(adev, entry);
5430 	return 0;
5431 }
5432 
5433 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5434 				  struct amdgpu_irq_src *source,
5435 				  struct amdgpu_iv_entry *entry)
5436 {
5437 	DRM_ERROR("Illegal instruction in command stream\n");
5438 	gfx_v9_0_fault(adev, entry);
5439 	return 0;
5440 }
5441 
5442 
5443 static const struct ras_gfx_subblock_reg ras_subblock_regs[] = {
5444 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5445 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5446 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5447 	},
5448 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5449 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5450 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5451 	},
5452 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5453 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5454 	  0, 0
5455 	},
5456 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5457 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5458 	  0, 0
5459 	},
5460 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5461 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5462 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5463 	},
5464 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5465 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5466 	  0, 0
5467 	},
5468 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5469 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5470 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5471 	},
5472 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5473 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5474 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5475 	},
5476 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5477 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5478 	  0, 0
5479 	},
5480 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5481 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5482 	  0, 0
5483 	},
5484 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5485 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5486 	  0, 0
5487 	},
5488 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5489 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5490 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5491 	},
5492 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5493 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5494 	  0, 0
5495 	},
5496 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5497 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5498 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5499 	},
5500 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5501 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5502 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5503 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5504 	},
5505 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5506 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5507 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5508 	  0, 0
5509 	},
5510 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5511 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5512 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5513 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5514 	},
5515 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5516 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5517 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5518 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5519 	},
5520 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5521 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5522 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5523 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5524 	},
5525 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5526 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5527 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5528 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5529 	},
5530 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5531 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5532 	  0, 0
5533 	},
5534 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5535 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5536 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5537 	},
5538 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5539 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5540 	  0, 0
5541 	},
5542 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5543 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5544 	  0, 0
5545 	},
5546 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5547 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5548 	  0, 0
5549 	},
5550 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5551 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5552 	  0, 0
5553 	},
5554 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5555 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5556 	  0, 0
5557 	},
5558 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5559 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5560 	  0, 0
5561 	},
5562 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5563 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5564 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5565 	},
5566 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5567 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5568 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5569 	},
5570 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5571 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5572 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5573 	},
5574 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5575 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5576 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5577 	},
5578 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5579 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5580 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5581 	},
5582 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5583 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5584 	  0, 0
5585 	},
5586 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5587 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5588 	  0, 0
5589 	},
5590 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5591 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5592 	  0, 0
5593 	},
5594 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5595 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5596 	  0, 0
5597 	},
5598 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5599 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5600 	  0, 0
5601 	},
5602 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5603 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5604 	  0, 0
5605 	},
5606 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5607 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5608 	  0, 0
5609 	},
5610 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5611 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5612 	  0, 0
5613 	},
5614 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5615 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5616 	  0, 0
5617 	},
5618 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5619 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5620 	  0, 0
5621 	},
5622 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5623 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5624 	  0, 0
5625 	},
5626 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5627 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5628 	  0, 0
5629 	},
5630 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5631 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5632 	  0, 0
5633 	},
5634 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5635 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5636 	  0, 0
5637 	},
5638 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5639 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5640 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5641 	},
5642 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5643 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5644 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5645 	},
5646 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5647 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5648 	  0, 0
5649 	},
5650 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5651 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5652 	  0, 0
5653 	},
5654 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5655 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5656 	  0, 0
5657 	},
5658 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5659 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5660 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5661 	},
5662 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5663 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5664 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5665 	},
5666 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5667 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5668 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5669 	},
5670 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5671 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5672 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5673 	},
5674 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5675 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5676 	  0, 0
5677 	},
5678 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5679 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5680 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5681 	},
5682 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5683 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5684 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5685 	},
5686 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5687 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5688 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5689 	},
5690 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5691 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5692 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5693 	},
5694 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5695 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5696 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5697 	},
5698 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5699 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5700 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5701 	},
5702 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5703 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5704 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5705 	},
5706 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5707 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5708 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5709 	},
5710 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5711 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5712 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5713 	},
5714 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5715 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5716 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5717 	},
5718 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5719 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5720 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5721 	},
5722 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5723 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5724 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5725 	},
5726 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5727 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5728 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5729 	},
5730 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5731 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5732 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5733 	},
5734 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5735 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5736 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5737 	},
5738 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5739 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5740 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5741 	},
5742 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5743 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5744 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5745 	},
5746 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5747 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5748 	  0, 0
5749 	},
5750 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5751 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5752 	  0, 0
5753 	},
5754 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5755 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5756 	  0, 0
5757 	},
5758 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5759 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5760 	  0, 0
5761 	},
5762 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5763 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5764 	  0, 0
5765 	},
5766 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5767 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5768 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5769 	},
5770 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5771 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5772 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5773 	},
5774 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5775 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5776 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5777 	},
5778 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5779 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5780 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5781 	},
5782 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5783 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5784 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5785 	},
5786 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5787 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5788 	  0, 0
5789 	},
5790 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5791 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5792 	  0, 0
5793 	},
5794 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5795 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5796 	  0, 0
5797 	},
5798 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5799 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5800 	  0, 0
5801 	},
5802 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5803 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5804 	  0, 0
5805 	},
5806 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5807 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5808 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5809 	},
5810 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5811 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5812 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5813 	},
5814 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5815 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5816 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5817 	},
5818 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5819 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5820 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5821 	},
5822 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5823 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5824 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5825 	},
5826 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5827 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5828 	  0, 0
5829 	},
5830 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5831 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5832 	  0, 0
5833 	},
5834 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5835 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5836 	  0, 0
5837 	},
5838 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5839 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5840 	  0, 0
5841 	},
5842 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5843 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5844 	  0, 0
5845 	},
5846 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5847 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5848 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5849 	},
5850 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5851 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5852 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5853 	},
5854 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5855 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5856 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5857 	},
5858 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5859 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5860 	  0, 0
5861 	},
5862 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5863 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5864 	  0, 0
5865 	},
5866 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5867 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5868 	  0, 0
5869 	},
5870 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5871 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5872 	  0, 0
5873 	},
5874 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5875 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5876 	  0, 0
5877 	},
5878 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5879 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5880 	  0, 0
5881 	}
5882 };
5883 
5884 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5885 				     void *inject_if)
5886 {
5887 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5888 	int ret;
5889 	struct ta_ras_trigger_error_input block_info = { 0 };
5890 
5891 	if (adev->asic_type != CHIP_VEGA20)
5892 		return -EINVAL;
5893 
5894 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5895 		return -EINVAL;
5896 
5897 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5898 		return -EPERM;
5899 
5900 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5901 	      info->head.type)) {
5902 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5903 			ras_gfx_subblocks[info->head.sub_block_index].name,
5904 			info->head.type);
5905 		return -EPERM;
5906 	}
5907 
5908 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5909 	      info->head.type)) {
5910 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5911 			ras_gfx_subblocks[info->head.sub_block_index].name,
5912 			info->head.type);
5913 		return -EPERM;
5914 	}
5915 
5916 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5917 	block_info.sub_block_index =
5918 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5919 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5920 	block_info.address = info->address;
5921 	block_info.value = info->value;
5922 
5923 	mutex_lock(&adev->grbm_idx_mutex);
5924 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
5925 	mutex_unlock(&adev->grbm_idx_mutex);
5926 
5927 	return ret;
5928 }
5929 
5930 static const char *vml2_mems[] = {
5931 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5932 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5933 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
5934 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
5935 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5936 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5937 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
5938 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
5939 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5940 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5941 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
5942 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
5943 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5944 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5945 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
5946 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
5947 };
5948 
5949 static const char *vml2_walker_mems[] = {
5950 	"UTC_VML2_CACHE_PDE0_MEM0",
5951 	"UTC_VML2_CACHE_PDE0_MEM1",
5952 	"UTC_VML2_CACHE_PDE1_MEM0",
5953 	"UTC_VML2_CACHE_PDE1_MEM1",
5954 	"UTC_VML2_CACHE_PDE2_MEM0",
5955 	"UTC_VML2_CACHE_PDE2_MEM1",
5956 	"UTC_VML2_RDIF_LOG_FIFO",
5957 };
5958 
5959 static const char *atc_l2_cache_2m_mems[] = {
5960 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
5961 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
5962 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
5963 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
5964 };
5965 
5966 static const char *atc_l2_cache_4k_mems[] = {
5967 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
5968 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
5969 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
5970 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
5971 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
5972 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
5973 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
5974 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
5975 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
5976 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
5977 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
5978 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
5979 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
5980 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
5981 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
5982 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
5983 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
5984 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
5985 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
5986 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
5987 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
5988 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
5989 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
5990 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
5991 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
5992 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
5993 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
5994 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
5995 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
5996 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
5997 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
5998 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
5999 };
6000 
6001 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6002 					 struct ras_err_data *err_data)
6003 {
6004 	uint32_t i, data;
6005 	uint32_t sec_count, ded_count;
6006 
6007 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6008 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6009 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6010 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6011 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6012 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6013 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6014 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6015 
6016 	for (i = 0; i < 16; i++) {
6017 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6018 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6019 
6020 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6021 		if (sec_count) {
6022 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6023 				 vml2_mems[i], sec_count);
6024 			err_data->ce_count += sec_count;
6025 		}
6026 
6027 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6028 		if (ded_count) {
6029 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6030 				 vml2_mems[i], ded_count);
6031 			err_data->ue_count += ded_count;
6032 		}
6033 	}
6034 
6035 	for (i = 0; i < 7; i++) {
6036 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6037 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6038 
6039 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6040 						SEC_COUNT);
6041 		if (sec_count) {
6042 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6043 				 vml2_walker_mems[i], sec_count);
6044 			err_data->ce_count += sec_count;
6045 		}
6046 
6047 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6048 						DED_COUNT);
6049 		if (ded_count) {
6050 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6051 				 vml2_walker_mems[i], ded_count);
6052 			err_data->ue_count += ded_count;
6053 		}
6054 	}
6055 
6056 	for (i = 0; i < 4; i++) {
6057 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6058 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6059 
6060 		sec_count = (data & 0x00006000L) >> 0xd;
6061 		if (sec_count) {
6062 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6063 				 atc_l2_cache_2m_mems[i], sec_count);
6064 			err_data->ce_count += sec_count;
6065 		}
6066 	}
6067 
6068 	for (i = 0; i < 32; i++) {
6069 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6070 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6071 
6072 		sec_count = (data & 0x00006000L) >> 0xd;
6073 		if (sec_count) {
6074 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6075 				 atc_l2_cache_4k_mems[i], sec_count);
6076 			err_data->ce_count += sec_count;
6077 		}
6078 
6079 		ded_count = (data & 0x00018000L) >> 0xf;
6080 		if (ded_count) {
6081 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6082 				 atc_l2_cache_4k_mems[i], ded_count);
6083 			err_data->ue_count += ded_count;
6084 		}
6085 	}
6086 
6087 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6088 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6089 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6090 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6091 
6092 	return 0;
6093 }
6094 
6095 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6096 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6097 	uint32_t *sec_count, uint32_t *ded_count)
6098 {
6099 	uint32_t i;
6100 	uint32_t sec_cnt, ded_cnt;
6101 
6102 	for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) {
6103 		if(ras_subblock_regs[i].reg_offset != reg->reg_offset ||
6104 			ras_subblock_regs[i].seg != reg->seg ||
6105 			ras_subblock_regs[i].inst != reg->inst)
6106 			continue;
6107 
6108 		sec_cnt = (value &
6109 				ras_subblock_regs[i].sec_count_mask) >>
6110 				ras_subblock_regs[i].sec_count_shift;
6111 		if (sec_cnt) {
6112 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6113 				ras_subblock_regs[i].name,
6114 				se_id, inst_id,
6115 				sec_cnt);
6116 			*sec_count += sec_cnt;
6117 		}
6118 
6119 		ded_cnt = (value &
6120 				ras_subblock_regs[i].ded_count_mask) >>
6121 				ras_subblock_regs[i].ded_count_shift;
6122 		if (ded_cnt) {
6123 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6124 				ras_subblock_regs[i].name,
6125 				se_id, inst_id,
6126 				ded_cnt);
6127 			*ded_count += ded_cnt;
6128 		}
6129 	}
6130 
6131 	return 0;
6132 }
6133 
6134 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6135 					  void *ras_error_status)
6136 {
6137 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6138 	uint32_t sec_count = 0, ded_count = 0;
6139 	uint32_t i, j, k;
6140 	uint32_t reg_value;
6141 
6142 	if (adev->asic_type != CHIP_VEGA20)
6143 		return -EINVAL;
6144 
6145 	err_data->ue_count = 0;
6146 	err_data->ce_count = 0;
6147 
6148 	mutex_lock(&adev->grbm_idx_mutex);
6149 
6150 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6151 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6152 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6153 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6154 				reg_value =
6155 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6156 				if (reg_value)
6157 					__get_ras_error_count(&sec_ded_counter_registers[i],
6158 							j, k, reg_value,
6159 							&sec_count, &ded_count);
6160 			}
6161 		}
6162 	}
6163 
6164 	err_data->ce_count += sec_count;
6165 	err_data->ue_count += ded_count;
6166 
6167 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6168 	mutex_unlock(&adev->grbm_idx_mutex);
6169 
6170 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6171 
6172 	return 0;
6173 }
6174 
6175 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6176 	.name = "gfx_v9_0",
6177 	.early_init = gfx_v9_0_early_init,
6178 	.late_init = gfx_v9_0_late_init,
6179 	.sw_init = gfx_v9_0_sw_init,
6180 	.sw_fini = gfx_v9_0_sw_fini,
6181 	.hw_init = gfx_v9_0_hw_init,
6182 	.hw_fini = gfx_v9_0_hw_fini,
6183 	.suspend = gfx_v9_0_suspend,
6184 	.resume = gfx_v9_0_resume,
6185 	.is_idle = gfx_v9_0_is_idle,
6186 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6187 	.soft_reset = gfx_v9_0_soft_reset,
6188 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6189 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6190 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6191 };
6192 
6193 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6194 	.type = AMDGPU_RING_TYPE_GFX,
6195 	.align_mask = 0xff,
6196 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6197 	.support_64bit_ptrs = true,
6198 	.vmhub = AMDGPU_GFXHUB_0,
6199 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6200 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6201 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6202 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6203 		5 +  /* COND_EXEC */
6204 		7 +  /* PIPELINE_SYNC */
6205 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6206 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6207 		2 + /* VM_FLUSH */
6208 		8 +  /* FENCE for VM_FLUSH */
6209 		20 + /* GDS switch */
6210 		4 + /* double SWITCH_BUFFER,
6211 		       the first COND_EXEC jump to the place just
6212 			   prior to this double SWITCH_BUFFER  */
6213 		5 + /* COND_EXEC */
6214 		7 +	 /*	HDP_flush */
6215 		4 +	 /*	VGT_flush */
6216 		14 + /*	CE_META */
6217 		31 + /*	DE_META */
6218 		3 + /* CNTX_CTRL */
6219 		5 + /* HDP_INVL */
6220 		8 + 8 + /* FENCE x2 */
6221 		2, /* SWITCH_BUFFER */
6222 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6223 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6224 	.emit_fence = gfx_v9_0_ring_emit_fence,
6225 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6226 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6227 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6228 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6229 	.test_ring = gfx_v9_0_ring_test_ring,
6230 	.test_ib = gfx_v9_0_ring_test_ib,
6231 	.insert_nop = amdgpu_ring_insert_nop,
6232 	.pad_ib = amdgpu_ring_generic_pad_ib,
6233 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6234 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6235 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6236 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6237 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6238 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6239 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6240 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6241 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6242 };
6243 
6244 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6245 	.type = AMDGPU_RING_TYPE_COMPUTE,
6246 	.align_mask = 0xff,
6247 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6248 	.support_64bit_ptrs = true,
6249 	.vmhub = AMDGPU_GFXHUB_0,
6250 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6251 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6252 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6253 	.emit_frame_size =
6254 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6255 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6256 		5 + /* hdp invalidate */
6257 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6258 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6259 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6260 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6261 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6262 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6263 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6264 	.emit_fence = gfx_v9_0_ring_emit_fence,
6265 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6266 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6267 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6268 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6269 	.test_ring = gfx_v9_0_ring_test_ring,
6270 	.test_ib = gfx_v9_0_ring_test_ib,
6271 	.insert_nop = amdgpu_ring_insert_nop,
6272 	.pad_ib = amdgpu_ring_generic_pad_ib,
6273 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6274 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6275 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6276 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6277 };
6278 
6279 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6280 	.type = AMDGPU_RING_TYPE_KIQ,
6281 	.align_mask = 0xff,
6282 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6283 	.support_64bit_ptrs = true,
6284 	.vmhub = AMDGPU_GFXHUB_0,
6285 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6286 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6287 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6288 	.emit_frame_size =
6289 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6290 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6291 		5 + /* hdp invalidate */
6292 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6293 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6294 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6295 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6296 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6297 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6298 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6299 	.test_ring = gfx_v9_0_ring_test_ring,
6300 	.insert_nop = amdgpu_ring_insert_nop,
6301 	.pad_ib = amdgpu_ring_generic_pad_ib,
6302 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6303 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6304 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6305 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6306 };
6307 
6308 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6309 {
6310 	int i;
6311 
6312 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6313 
6314 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6315 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6316 
6317 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6318 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6319 }
6320 
6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6322 	.set = gfx_v9_0_set_eop_interrupt_state,
6323 	.process = gfx_v9_0_eop_irq,
6324 };
6325 
6326 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6327 	.set = gfx_v9_0_set_priv_reg_fault_state,
6328 	.process = gfx_v9_0_priv_reg_irq,
6329 };
6330 
6331 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6332 	.set = gfx_v9_0_set_priv_inst_fault_state,
6333 	.process = gfx_v9_0_priv_inst_irq,
6334 };
6335 
6336 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6337 	.set = gfx_v9_0_set_cp_ecc_error_state,
6338 	.process = amdgpu_gfx_cp_ecc_error_irq,
6339 };
6340 
6341 
6342 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6343 {
6344 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6345 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6346 
6347 	adev->gfx.priv_reg_irq.num_types = 1;
6348 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6349 
6350 	adev->gfx.priv_inst_irq.num_types = 1;
6351 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6352 
6353 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6354 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6355 }
6356 
6357 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6358 {
6359 	switch (adev->asic_type) {
6360 	case CHIP_VEGA10:
6361 	case CHIP_VEGA12:
6362 	case CHIP_VEGA20:
6363 	case CHIP_RAVEN:
6364 	case CHIP_ARCTURUS:
6365 	case CHIP_RENOIR:
6366 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6367 		break;
6368 	default:
6369 		break;
6370 	}
6371 }
6372 
6373 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6374 {
6375 	/* init asci gds info */
6376 	switch (adev->asic_type) {
6377 	case CHIP_VEGA10:
6378 	case CHIP_VEGA12:
6379 	case CHIP_VEGA20:
6380 		adev->gds.gds_size = 0x10000;
6381 		break;
6382 	case CHIP_RAVEN:
6383 	case CHIP_ARCTURUS:
6384 		adev->gds.gds_size = 0x1000;
6385 		break;
6386 	default:
6387 		adev->gds.gds_size = 0x10000;
6388 		break;
6389 	}
6390 
6391 	switch (adev->asic_type) {
6392 	case CHIP_VEGA10:
6393 	case CHIP_VEGA20:
6394 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6395 		break;
6396 	case CHIP_VEGA12:
6397 		adev->gds.gds_compute_max_wave_id = 0x27f;
6398 		break;
6399 	case CHIP_RAVEN:
6400 		if (adev->rev_id >= 0x8)
6401 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6402 		else
6403 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6404 		break;
6405 	case CHIP_ARCTURUS:
6406 		adev->gds.gds_compute_max_wave_id = 0xfff;
6407 		break;
6408 	default:
6409 		/* this really depends on the chip */
6410 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6411 		break;
6412 	}
6413 
6414 	adev->gds.gws_size = 64;
6415 	adev->gds.oa_size = 16;
6416 }
6417 
6418 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6419 						 u32 bitmap)
6420 {
6421 	u32 data;
6422 
6423 	if (!bitmap)
6424 		return;
6425 
6426 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6427 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6428 
6429 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6430 }
6431 
6432 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6433 {
6434 	u32 data, mask;
6435 
6436 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6437 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6438 
6439 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6440 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6441 
6442 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6443 
6444 	return (~data) & mask;
6445 }
6446 
6447 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6448 				 struct amdgpu_cu_info *cu_info)
6449 {
6450 	int i, j, k, counter, active_cu_number = 0;
6451 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6452 	unsigned disable_masks[4 * 4];
6453 
6454 	if (!adev || !cu_info)
6455 		return -EINVAL;
6456 
6457 	/*
6458 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6459 	 */
6460 	if (adev->gfx.config.max_shader_engines *
6461 		adev->gfx.config.max_sh_per_se > 16)
6462 		return -EINVAL;
6463 
6464 	amdgpu_gfx_parse_disable_cu(disable_masks,
6465 				    adev->gfx.config.max_shader_engines,
6466 				    adev->gfx.config.max_sh_per_se);
6467 
6468 	mutex_lock(&adev->grbm_idx_mutex);
6469 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6470 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6471 			mask = 1;
6472 			ao_bitmap = 0;
6473 			counter = 0;
6474 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6475 			gfx_v9_0_set_user_cu_inactive_bitmap(
6476 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6477 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6478 
6479 			/*
6480 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6481 			 * 4x4 size array, and it's usually suitable for Vega
6482 			 * ASICs which has 4*2 SE/SH layout.
6483 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6484 			 * To mostly reduce the impact, we make it compatible
6485 			 * with current bitmap array as below:
6486 			 *    SE4,SH0 --> bitmap[0][1]
6487 			 *    SE5,SH0 --> bitmap[1][1]
6488 			 *    SE6,SH0 --> bitmap[2][1]
6489 			 *    SE7,SH0 --> bitmap[3][1]
6490 			 */
6491 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6492 
6493 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6494 				if (bitmap & mask) {
6495 					if (counter < adev->gfx.config.max_cu_per_sh)
6496 						ao_bitmap |= mask;
6497 					counter ++;
6498 				}
6499 				mask <<= 1;
6500 			}
6501 			active_cu_number += counter;
6502 			if (i < 2 && j < 2)
6503 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6504 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6505 		}
6506 	}
6507 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6508 	mutex_unlock(&adev->grbm_idx_mutex);
6509 
6510 	cu_info->number = active_cu_number;
6511 	cu_info->ao_cu_mask = ao_cu_mask;
6512 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6513 
6514 	return 0;
6515 }
6516 
6517 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6518 {
6519 	.type = AMD_IP_BLOCK_TYPE_GFX,
6520 	.major = 9,
6521 	.minor = 0,
6522 	.rev = 0,
6523 	.funcs = &gfx_v9_0_ip_funcs,
6524 };
6525