xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision d9a07577b8a3131c90c187fb2b89662bee535cfd)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 struct ras_gfx_subblock_reg {
135 	const char *name;
136 	uint32_t hwip;
137 	uint32_t inst;
138 	uint32_t seg;
139 	uint32_t reg_offset;
140 	uint32_t sec_count_mask;
141 	uint32_t sec_count_shift;
142 	uint32_t ded_count_mask;
143 	uint32_t ded_count_shift;
144 };
145 
146 enum ta_ras_gfx_subblock {
147 	/*CPC*/
148 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
149 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
150 	TA_RAS_BLOCK__GFX_CPC_UCODE,
151 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
152 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
153 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
155 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
156 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
157 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
158 	/* CPF*/
159 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
160 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
161 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
162 	TA_RAS_BLOCK__GFX_CPF_TAG,
163 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
164 	/* CPG*/
165 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
166 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
167 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
168 	TA_RAS_BLOCK__GFX_CPG_TAG,
169 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
170 	/* GDS*/
171 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
172 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
173 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
174 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
175 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
178 	/* SPI*/
179 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
180 	/* SQ*/
181 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
182 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
183 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
184 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
185 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
186 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
187 	/* SQC (3 ranges)*/
188 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
189 	/* SQC range 0*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
193 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
194 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
200 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	/* SQC range 1*/
202 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
203 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
204 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
206 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
208 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
209 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
214 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	/* SQC range 2*/
216 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
217 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
218 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
220 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
222 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
223 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
227 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
228 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
230 	/* TA*/
231 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
232 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
233 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
234 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
235 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
237 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
238 	/* TCA*/
239 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
240 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
241 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
242 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
243 	/* TCC (5 sub-ranges)*/
244 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
245 	/* TCC range 0*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
248 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
253 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
254 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
255 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
256 	/* TCC range 1*/
257 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
258 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
259 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
261 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	/* TCC range 2*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
264 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
265 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
266 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
267 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
268 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
269 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
270 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
271 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
273 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	/* TCC range 3*/
275 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
276 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
277 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
278 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
279 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	/* TCC range 4*/
281 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
282 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
283 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
285 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
286 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
288 	/* TCI*/
289 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
290 	/* TCP*/
291 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
292 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
293 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
294 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
295 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
296 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
297 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
298 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
299 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
300 	/* TD*/
301 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
302 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
303 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
304 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
305 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
306 	/* EA (3 sub-ranges)*/
307 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
308 	/* EA range 0*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
311 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
313 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
314 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
316 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
318 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
319 	/* EA range 1*/
320 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
321 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
322 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
323 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
324 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
325 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
326 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
327 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
328 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
329 	/* EA range 2*/
330 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
331 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
332 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
335 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
336 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
337 	/* UTC VM L2 bank*/
338 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
339 	/* UTC VM walker*/
340 	TA_RAS_BLOCK__UTC_VML2_WALKER,
341 	/* UTC ATC L2 2MB cache*/
342 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
343 	/* UTC ATC L2 4KB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
345 	TA_RAS_BLOCK__GFX_MAX
346 };
347 
348 struct ras_gfx_subblock {
349 	unsigned char *name;
350 	int ta_subblock;
351 	int hw_supported_error_type;
352 	int sw_supported_error_type;
353 };
354 
355 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
356 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
357 		#subblock,                                                     \
358 		TA_RAS_BLOCK__##subblock,                                      \
359 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
360 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
361 	}
362 
363 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
381 			     0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
392 			     0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
394 			     0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400 			     0, 0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
404 			     1),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
406 			     0, 0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
418 			     0, 0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
424 			     0, 0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
436 			     0, 0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
448 			     1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
454 			     0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
469 			     0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
472 			     0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
474 			     0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
476 			     0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
511 };
512 
513 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
514 {
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
535 };
536 
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
538 {
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
557 };
558 
559 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
560 {
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
572 };
573 
574 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
575 {
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
600 };
601 
602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
603 {
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
611 };
612 
613 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
614 {
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
634 };
635 
636 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
637 {
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
650 };
651 
652 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
653 {
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
657 };
658 
659 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
660 {
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
677 };
678 
679 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
680 {
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
694 };
695 
696 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
697 {
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
707 };
708 
709 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
710 {
711 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 };
720 
721 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
722 {
723 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 };
732 
733 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
734 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
735 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
736 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
737 
738 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
739 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
740 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
741 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
742 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
743                                  struct amdgpu_cu_info *cu_info);
744 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
745 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
746 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
747 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
748 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
749 					  void *ras_error_status);
750 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
751 				     void *inject_if);
752 
753 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
754 {
755 	switch (adev->asic_type) {
756 	case CHIP_VEGA10:
757 		soc15_program_register_sequence(adev,
758 						golden_settings_gc_9_0,
759 						ARRAY_SIZE(golden_settings_gc_9_0));
760 		soc15_program_register_sequence(adev,
761 						golden_settings_gc_9_0_vg10,
762 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
763 		break;
764 	case CHIP_VEGA12:
765 		soc15_program_register_sequence(adev,
766 						golden_settings_gc_9_2_1,
767 						ARRAY_SIZE(golden_settings_gc_9_2_1));
768 		soc15_program_register_sequence(adev,
769 						golden_settings_gc_9_2_1_vg12,
770 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
771 		break;
772 	case CHIP_VEGA20:
773 		soc15_program_register_sequence(adev,
774 						golden_settings_gc_9_0,
775 						ARRAY_SIZE(golden_settings_gc_9_0));
776 		soc15_program_register_sequence(adev,
777 						golden_settings_gc_9_0_vg20,
778 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
779 		break;
780 	case CHIP_ARCTURUS:
781 		soc15_program_register_sequence(adev,
782 						golden_settings_gc_9_4_1_arct,
783 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
784 		break;
785 	case CHIP_RAVEN:
786 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
787 						ARRAY_SIZE(golden_settings_gc_9_1));
788 		if (adev->rev_id >= 8)
789 			soc15_program_register_sequence(adev,
790 							golden_settings_gc_9_1_rv2,
791 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
792 		else
793 			soc15_program_register_sequence(adev,
794 							golden_settings_gc_9_1_rv1,
795 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
796 		break;
797 	 case CHIP_RENOIR:
798 		soc15_program_register_sequence(adev,
799 						golden_settings_gc_9_1_rn,
800 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
801 		return; /* for renoir, don't need common goldensetting */
802 	default:
803 		break;
804 	}
805 
806 	if (adev->asic_type != CHIP_ARCTURUS)
807 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
808 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
809 }
810 
811 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
812 {
813 	adev->gfx.scratch.num_reg = 8;
814 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
815 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
816 }
817 
818 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
819 				       bool wc, uint32_t reg, uint32_t val)
820 {
821 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
822 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
823 				WRITE_DATA_DST_SEL(0) |
824 				(wc ? WR_CONFIRM : 0));
825 	amdgpu_ring_write(ring, reg);
826 	amdgpu_ring_write(ring, 0);
827 	amdgpu_ring_write(ring, val);
828 }
829 
830 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
831 				  int mem_space, int opt, uint32_t addr0,
832 				  uint32_t addr1, uint32_t ref, uint32_t mask,
833 				  uint32_t inv)
834 {
835 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
836 	amdgpu_ring_write(ring,
837 				 /* memory (1) or register (0) */
838 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
839 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
840 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
841 				 WAIT_REG_MEM_ENGINE(eng_sel)));
842 
843 	if (mem_space)
844 		BUG_ON(addr0 & 0x3); /* Dword align */
845 	amdgpu_ring_write(ring, addr0);
846 	amdgpu_ring_write(ring, addr1);
847 	amdgpu_ring_write(ring, ref);
848 	amdgpu_ring_write(ring, mask);
849 	amdgpu_ring_write(ring, inv); /* poll interval */
850 }
851 
852 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
853 {
854 	struct amdgpu_device *adev = ring->adev;
855 	uint32_t scratch;
856 	uint32_t tmp = 0;
857 	unsigned i;
858 	int r;
859 
860 	r = amdgpu_gfx_scratch_get(adev, &scratch);
861 	if (r)
862 		return r;
863 
864 	WREG32(scratch, 0xCAFEDEAD);
865 	r = amdgpu_ring_alloc(ring, 3);
866 	if (r)
867 		goto error_free_scratch;
868 
869 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
870 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
871 	amdgpu_ring_write(ring, 0xDEADBEEF);
872 	amdgpu_ring_commit(ring);
873 
874 	for (i = 0; i < adev->usec_timeout; i++) {
875 		tmp = RREG32(scratch);
876 		if (tmp == 0xDEADBEEF)
877 			break;
878 		udelay(1);
879 	}
880 
881 	if (i >= adev->usec_timeout)
882 		r = -ETIMEDOUT;
883 
884 error_free_scratch:
885 	amdgpu_gfx_scratch_free(adev, scratch);
886 	return r;
887 }
888 
889 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
890 {
891 	struct amdgpu_device *adev = ring->adev;
892 	struct amdgpu_ib ib;
893 	struct dma_fence *f = NULL;
894 
895 	unsigned index;
896 	uint64_t gpu_addr;
897 	uint32_t tmp;
898 	long r;
899 
900 	r = amdgpu_device_wb_get(adev, &index);
901 	if (r)
902 		return r;
903 
904 	gpu_addr = adev->wb.gpu_addr + (index * 4);
905 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
906 	memset(&ib, 0, sizeof(ib));
907 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
908 	if (r)
909 		goto err1;
910 
911 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
912 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
913 	ib.ptr[2] = lower_32_bits(gpu_addr);
914 	ib.ptr[3] = upper_32_bits(gpu_addr);
915 	ib.ptr[4] = 0xDEADBEEF;
916 	ib.length_dw = 5;
917 
918 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
919 	if (r)
920 		goto err2;
921 
922 	r = dma_fence_wait_timeout(f, false, timeout);
923 	if (r == 0) {
924 		r = -ETIMEDOUT;
925 		goto err2;
926 	} else if (r < 0) {
927 		goto err2;
928 	}
929 
930 	tmp = adev->wb.wb[index];
931 	if (tmp == 0xDEADBEEF)
932 		r = 0;
933 	else
934 		r = -EINVAL;
935 
936 err2:
937 	amdgpu_ib_free(adev, &ib, NULL);
938 	dma_fence_put(f);
939 err1:
940 	amdgpu_device_wb_free(adev, index);
941 	return r;
942 }
943 
944 
945 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
946 {
947 	release_firmware(adev->gfx.pfp_fw);
948 	adev->gfx.pfp_fw = NULL;
949 	release_firmware(adev->gfx.me_fw);
950 	adev->gfx.me_fw = NULL;
951 	release_firmware(adev->gfx.ce_fw);
952 	adev->gfx.ce_fw = NULL;
953 	release_firmware(adev->gfx.rlc_fw);
954 	adev->gfx.rlc_fw = NULL;
955 	release_firmware(adev->gfx.mec_fw);
956 	adev->gfx.mec_fw = NULL;
957 	release_firmware(adev->gfx.mec2_fw);
958 	adev->gfx.mec2_fw = NULL;
959 
960 	kfree(adev->gfx.rlc.register_list_format);
961 }
962 
963 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
964 {
965 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
966 
967 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
968 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
969 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
970 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
971 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
972 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
973 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
974 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
975 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
976 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
977 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
978 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
979 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
980 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
981 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
982 }
983 
984 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
985 {
986 	adev->gfx.me_fw_write_wait = false;
987 	adev->gfx.mec_fw_write_wait = false;
988 
989 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
990 	    (adev->gfx.mec_feature_version < 46) ||
991 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
992 	    (adev->gfx.pfp_feature_version < 46))
993 		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
994 			      GRBM requires 1-cycle delay in cp firmware\n");
995 
996 	switch (adev->asic_type) {
997 	case CHIP_VEGA10:
998 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
999 		    (adev->gfx.me_feature_version >= 42) &&
1000 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1001 		    (adev->gfx.pfp_feature_version >= 42))
1002 			adev->gfx.me_fw_write_wait = true;
1003 
1004 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1005 		    (adev->gfx.mec_feature_version >= 42))
1006 			adev->gfx.mec_fw_write_wait = true;
1007 		break;
1008 	case CHIP_VEGA12:
1009 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1010 		    (adev->gfx.me_feature_version >= 44) &&
1011 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1012 		    (adev->gfx.pfp_feature_version >= 44))
1013 			adev->gfx.me_fw_write_wait = true;
1014 
1015 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1016 		    (adev->gfx.mec_feature_version >= 44))
1017 			adev->gfx.mec_fw_write_wait = true;
1018 		break;
1019 	case CHIP_VEGA20:
1020 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1021 		    (adev->gfx.me_feature_version >= 44) &&
1022 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1023 		    (adev->gfx.pfp_feature_version >= 44))
1024 			adev->gfx.me_fw_write_wait = true;
1025 
1026 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1027 		    (adev->gfx.mec_feature_version >= 44))
1028 			adev->gfx.mec_fw_write_wait = true;
1029 		break;
1030 	case CHIP_RAVEN:
1031 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1032 		    (adev->gfx.me_feature_version >= 42) &&
1033 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1034 		    (adev->gfx.pfp_feature_version >= 42))
1035 			adev->gfx.me_fw_write_wait = true;
1036 
1037 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1038 		    (adev->gfx.mec_feature_version >= 42))
1039 			adev->gfx.mec_fw_write_wait = true;
1040 		break;
1041 	default:
1042 		break;
1043 	}
1044 }
1045 
1046 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1047 {
1048 	switch (adev->asic_type) {
1049 	case CHIP_VEGA10:
1050 	case CHIP_VEGA12:
1051 	case CHIP_VEGA20:
1052 		break;
1053 	case CHIP_RAVEN:
1054 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1055 			&&((adev->gfx.rlc_fw_version != 106 &&
1056 			     adev->gfx.rlc_fw_version < 531) ||
1057 			    (adev->gfx.rlc_fw_version == 53815) ||
1058 			    (adev->gfx.rlc_feature_version < 1) ||
1059 			    !adev->gfx.rlc.is_rlc_v2_1))
1060 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1061 
1062 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1063 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1064 				AMD_PG_SUPPORT_CP |
1065 				AMD_PG_SUPPORT_RLC_SMU_HS;
1066 		break;
1067 	case CHIP_RENOIR:
1068 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1069 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1070 				AMD_PG_SUPPORT_CP |
1071 				AMD_PG_SUPPORT_RLC_SMU_HS;
1072 		break;
1073 	default:
1074 		break;
1075 	}
1076 }
1077 
1078 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1079 					  const char *chip_name)
1080 {
1081 	char fw_name[30];
1082 	int err;
1083 	struct amdgpu_firmware_info *info = NULL;
1084 	const struct common_firmware_header *header = NULL;
1085 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1086 
1087 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1088 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1089 	if (err)
1090 		goto out;
1091 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1092 	if (err)
1093 		goto out;
1094 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1095 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1096 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1097 
1098 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1099 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1100 	if (err)
1101 		goto out;
1102 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1103 	if (err)
1104 		goto out;
1105 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1106 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1107 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1108 
1109 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1110 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1111 	if (err)
1112 		goto out;
1113 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1114 	if (err)
1115 		goto out;
1116 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1117 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1118 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1119 
1120 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1121 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1122 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1123 		info->fw = adev->gfx.pfp_fw;
1124 		header = (const struct common_firmware_header *)info->fw->data;
1125 		adev->firmware.fw_size +=
1126 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1127 
1128 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1129 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1130 		info->fw = adev->gfx.me_fw;
1131 		header = (const struct common_firmware_header *)info->fw->data;
1132 		adev->firmware.fw_size +=
1133 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1134 
1135 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1136 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1137 		info->fw = adev->gfx.ce_fw;
1138 		header = (const struct common_firmware_header *)info->fw->data;
1139 		adev->firmware.fw_size +=
1140 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1141 	}
1142 
1143 out:
1144 	if (err) {
1145 		dev_err(adev->dev,
1146 			"gfx9: Failed to load firmware \"%s\"\n",
1147 			fw_name);
1148 		release_firmware(adev->gfx.pfp_fw);
1149 		adev->gfx.pfp_fw = NULL;
1150 		release_firmware(adev->gfx.me_fw);
1151 		adev->gfx.me_fw = NULL;
1152 		release_firmware(adev->gfx.ce_fw);
1153 		adev->gfx.ce_fw = NULL;
1154 	}
1155 	return err;
1156 }
1157 
1158 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1159 					  const char *chip_name)
1160 {
1161 	char fw_name[30];
1162 	int err;
1163 	struct amdgpu_firmware_info *info = NULL;
1164 	const struct common_firmware_header *header = NULL;
1165 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1166 	unsigned int *tmp = NULL;
1167 	unsigned int i = 0;
1168 	uint16_t version_major;
1169 	uint16_t version_minor;
1170 	uint32_t smu_version;
1171 
1172 	/*
1173 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1174 	 * instead of picasso_rlc.bin.
1175 	 * Judgment method:
1176 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1177 	 *          or revision >= 0xD8 && revision <= 0xDF
1178 	 * otherwise is PCO FP5
1179 	 */
1180 	if (!strcmp(chip_name, "picasso") &&
1181 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1182 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1183 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1184 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1185 		(smu_version >= 0x41e2b))
1186 		/**
1187 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1188 		*/
1189 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1190 	else
1191 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1192 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1193 	if (err)
1194 		goto out;
1195 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1196 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1197 
1198 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1199 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1200 	if (version_major == 2 && version_minor == 1)
1201 		adev->gfx.rlc.is_rlc_v2_1 = true;
1202 
1203 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1204 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1205 	adev->gfx.rlc.save_and_restore_offset =
1206 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1207 	adev->gfx.rlc.clear_state_descriptor_offset =
1208 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1209 	adev->gfx.rlc.avail_scratch_ram_locations =
1210 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1211 	adev->gfx.rlc.reg_restore_list_size =
1212 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1213 	adev->gfx.rlc.reg_list_format_start =
1214 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1215 	adev->gfx.rlc.reg_list_format_separate_start =
1216 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1217 	adev->gfx.rlc.starting_offsets_start =
1218 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1219 	adev->gfx.rlc.reg_list_format_size_bytes =
1220 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1221 	adev->gfx.rlc.reg_list_size_bytes =
1222 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1223 	adev->gfx.rlc.register_list_format =
1224 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1225 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1226 	if (!adev->gfx.rlc.register_list_format) {
1227 		err = -ENOMEM;
1228 		goto out;
1229 	}
1230 
1231 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1232 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1233 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1234 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1235 
1236 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1237 
1238 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1239 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1240 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1241 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1242 
1243 	if (adev->gfx.rlc.is_rlc_v2_1)
1244 		gfx_v9_0_init_rlc_ext_microcode(adev);
1245 
1246 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1247 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1248 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1249 		info->fw = adev->gfx.rlc_fw;
1250 		header = (const struct common_firmware_header *)info->fw->data;
1251 		adev->firmware.fw_size +=
1252 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1253 
1254 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1255 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1256 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1257 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1258 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1259 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1260 			info->fw = adev->gfx.rlc_fw;
1261 			adev->firmware.fw_size +=
1262 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1263 
1264 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1265 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1266 			info->fw = adev->gfx.rlc_fw;
1267 			adev->firmware.fw_size +=
1268 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1269 
1270 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1271 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1272 			info->fw = adev->gfx.rlc_fw;
1273 			adev->firmware.fw_size +=
1274 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1275 		}
1276 	}
1277 
1278 out:
1279 	if (err) {
1280 		dev_err(adev->dev,
1281 			"gfx9: Failed to load firmware \"%s\"\n",
1282 			fw_name);
1283 		release_firmware(adev->gfx.rlc_fw);
1284 		adev->gfx.rlc_fw = NULL;
1285 	}
1286 	return err;
1287 }
1288 
1289 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1290 					  const char *chip_name)
1291 {
1292 	char fw_name[30];
1293 	int err;
1294 	struct amdgpu_firmware_info *info = NULL;
1295 	const struct common_firmware_header *header = NULL;
1296 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1297 
1298 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1299 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1300 	if (err)
1301 		goto out;
1302 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1303 	if (err)
1304 		goto out;
1305 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1306 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1307 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1308 
1309 
1310 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1311 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1312 	if (!err) {
1313 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1314 		if (err)
1315 			goto out;
1316 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1317 		adev->gfx.mec2_fw->data;
1318 		adev->gfx.mec2_fw_version =
1319 		le32_to_cpu(cp_hdr->header.ucode_version);
1320 		adev->gfx.mec2_feature_version =
1321 		le32_to_cpu(cp_hdr->ucode_feature_version);
1322 	} else {
1323 		err = 0;
1324 		adev->gfx.mec2_fw = NULL;
1325 	}
1326 
1327 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1328 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1329 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1330 		info->fw = adev->gfx.mec_fw;
1331 		header = (const struct common_firmware_header *)info->fw->data;
1332 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1333 		adev->firmware.fw_size +=
1334 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1335 
1336 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1337 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1338 		info->fw = adev->gfx.mec_fw;
1339 		adev->firmware.fw_size +=
1340 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1341 
1342 		if (adev->gfx.mec2_fw) {
1343 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1344 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1345 			info->fw = adev->gfx.mec2_fw;
1346 			header = (const struct common_firmware_header *)info->fw->data;
1347 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1348 			adev->firmware.fw_size +=
1349 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1350 
1351 			/* TODO: Determine if MEC2 JT FW loading can be removed
1352 				 for all GFX V9 asic and above */
1353 			if (adev->asic_type != CHIP_ARCTURUS &&
1354 			    adev->asic_type != CHIP_RENOIR) {
1355 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1356 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1357 				info->fw = adev->gfx.mec2_fw;
1358 				adev->firmware.fw_size +=
1359 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1360 					PAGE_SIZE);
1361 			}
1362 		}
1363 	}
1364 
1365 out:
1366 	gfx_v9_0_check_if_need_gfxoff(adev);
1367 	gfx_v9_0_check_fw_write_wait(adev);
1368 	if (err) {
1369 		dev_err(adev->dev,
1370 			"gfx9: Failed to load firmware \"%s\"\n",
1371 			fw_name);
1372 		release_firmware(adev->gfx.mec_fw);
1373 		adev->gfx.mec_fw = NULL;
1374 		release_firmware(adev->gfx.mec2_fw);
1375 		adev->gfx.mec2_fw = NULL;
1376 	}
1377 	return err;
1378 }
1379 
1380 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1381 {
1382 	const char *chip_name;
1383 	int r;
1384 
1385 	DRM_DEBUG("\n");
1386 
1387 	switch (adev->asic_type) {
1388 	case CHIP_VEGA10:
1389 		chip_name = "vega10";
1390 		break;
1391 	case CHIP_VEGA12:
1392 		chip_name = "vega12";
1393 		break;
1394 	case CHIP_VEGA20:
1395 		chip_name = "vega20";
1396 		break;
1397 	case CHIP_RAVEN:
1398 		if (adev->rev_id >= 8)
1399 			chip_name = "raven2";
1400 		else if (adev->pdev->device == 0x15d8)
1401 			chip_name = "picasso";
1402 		else
1403 			chip_name = "raven";
1404 		break;
1405 	case CHIP_ARCTURUS:
1406 		chip_name = "arcturus";
1407 		break;
1408 	case CHIP_RENOIR:
1409 		chip_name = "renoir";
1410 		break;
1411 	default:
1412 		BUG();
1413 	}
1414 
1415 	/* No CPG in Arcturus */
1416 	if (adev->asic_type != CHIP_ARCTURUS) {
1417 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1418 		if (r)
1419 			return r;
1420 	}
1421 
1422 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1423 	if (r)
1424 		return r;
1425 
1426 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1427 	if (r)
1428 		return r;
1429 
1430 	return r;
1431 }
1432 
1433 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1434 {
1435 	u32 count = 0;
1436 	const struct cs_section_def *sect = NULL;
1437 	const struct cs_extent_def *ext = NULL;
1438 
1439 	/* begin clear state */
1440 	count += 2;
1441 	/* context control state */
1442 	count += 3;
1443 
1444 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1445 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1446 			if (sect->id == SECT_CONTEXT)
1447 				count += 2 + ext->reg_count;
1448 			else
1449 				return 0;
1450 		}
1451 	}
1452 
1453 	/* end clear state */
1454 	count += 2;
1455 	/* clear state */
1456 	count += 2;
1457 
1458 	return count;
1459 }
1460 
1461 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1462 				    volatile u32 *buffer)
1463 {
1464 	u32 count = 0, i;
1465 	const struct cs_section_def *sect = NULL;
1466 	const struct cs_extent_def *ext = NULL;
1467 
1468 	if (adev->gfx.rlc.cs_data == NULL)
1469 		return;
1470 	if (buffer == NULL)
1471 		return;
1472 
1473 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1474 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1475 
1476 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1477 	buffer[count++] = cpu_to_le32(0x80000000);
1478 	buffer[count++] = cpu_to_le32(0x80000000);
1479 
1480 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1481 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1482 			if (sect->id == SECT_CONTEXT) {
1483 				buffer[count++] =
1484 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1485 				buffer[count++] = cpu_to_le32(ext->reg_index -
1486 						PACKET3_SET_CONTEXT_REG_START);
1487 				for (i = 0; i < ext->reg_count; i++)
1488 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1489 			} else {
1490 				return;
1491 			}
1492 		}
1493 	}
1494 
1495 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1496 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1497 
1498 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1499 	buffer[count++] = cpu_to_le32(0);
1500 }
1501 
1502 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1503 {
1504 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1505 	uint32_t pg_always_on_cu_num = 2;
1506 	uint32_t always_on_cu_num;
1507 	uint32_t i, j, k;
1508 	uint32_t mask, cu_bitmap, counter;
1509 
1510 	if (adev->flags & AMD_IS_APU)
1511 		always_on_cu_num = 4;
1512 	else if (adev->asic_type == CHIP_VEGA12)
1513 		always_on_cu_num = 8;
1514 	else
1515 		always_on_cu_num = 12;
1516 
1517 	mutex_lock(&adev->grbm_idx_mutex);
1518 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1519 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1520 			mask = 1;
1521 			cu_bitmap = 0;
1522 			counter = 0;
1523 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1524 
1525 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1526 				if (cu_info->bitmap[i][j] & mask) {
1527 					if (counter == pg_always_on_cu_num)
1528 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1529 					if (counter < always_on_cu_num)
1530 						cu_bitmap |= mask;
1531 					else
1532 						break;
1533 					counter++;
1534 				}
1535 				mask <<= 1;
1536 			}
1537 
1538 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1539 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1540 		}
1541 	}
1542 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1543 	mutex_unlock(&adev->grbm_idx_mutex);
1544 }
1545 
1546 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1547 {
1548 	uint32_t data;
1549 
1550 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1551 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1552 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1553 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1554 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1555 
1556 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1557 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1558 
1559 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1560 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1561 
1562 	mutex_lock(&adev->grbm_idx_mutex);
1563 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1564 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1565 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1566 
1567 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1568 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1569 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1570 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1571 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1572 
1573 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1574 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1575 	data &= 0x0000FFFF;
1576 	data |= 0x00C00000;
1577 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1578 
1579 	/*
1580 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1581 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1582 	 */
1583 
1584 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1585 	 * but used for RLC_LB_CNTL configuration */
1586 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1587 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1588 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1589 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1590 	mutex_unlock(&adev->grbm_idx_mutex);
1591 
1592 	gfx_v9_0_init_always_on_cu_mask(adev);
1593 }
1594 
1595 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1596 {
1597 	uint32_t data;
1598 
1599 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1600 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1601 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1602 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1603 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1604 
1605 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1606 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1607 
1608 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1609 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1610 
1611 	mutex_lock(&adev->grbm_idx_mutex);
1612 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1613 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1614 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1615 
1616 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1617 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1618 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1619 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1620 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1621 
1622 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1623 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1624 	data &= 0x0000FFFF;
1625 	data |= 0x00C00000;
1626 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1627 
1628 	/*
1629 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1630 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1631 	 */
1632 
1633 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1634 	 * but used for RLC_LB_CNTL configuration */
1635 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1636 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1637 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1638 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1639 	mutex_unlock(&adev->grbm_idx_mutex);
1640 
1641 	gfx_v9_0_init_always_on_cu_mask(adev);
1642 }
1643 
1644 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1645 {
1646 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1647 }
1648 
1649 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1650 {
1651 	return 5;
1652 }
1653 
1654 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1655 {
1656 	const struct cs_section_def *cs_data;
1657 	int r;
1658 
1659 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1660 
1661 	cs_data = adev->gfx.rlc.cs_data;
1662 
1663 	if (cs_data) {
1664 		/* init clear state block */
1665 		r = amdgpu_gfx_rlc_init_csb(adev);
1666 		if (r)
1667 			return r;
1668 	}
1669 
1670 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1671 		/* TODO: double check the cp_table_size for RV */
1672 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1673 		r = amdgpu_gfx_rlc_init_cpt(adev);
1674 		if (r)
1675 			return r;
1676 	}
1677 
1678 	switch (adev->asic_type) {
1679 	case CHIP_RAVEN:
1680 		gfx_v9_0_init_lbpw(adev);
1681 		break;
1682 	case CHIP_VEGA20:
1683 		gfx_v9_4_init_lbpw(adev);
1684 		break;
1685 	default:
1686 		break;
1687 	}
1688 
1689 	return 0;
1690 }
1691 
1692 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1693 {
1694 	int r;
1695 
1696 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1697 	if (unlikely(r != 0))
1698 		return r;
1699 
1700 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1701 			AMDGPU_GEM_DOMAIN_VRAM);
1702 	if (!r)
1703 		adev->gfx.rlc.clear_state_gpu_addr =
1704 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1705 
1706 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1707 
1708 	return r;
1709 }
1710 
1711 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1712 {
1713 	int r;
1714 
1715 	if (!adev->gfx.rlc.clear_state_obj)
1716 		return;
1717 
1718 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1719 	if (likely(r == 0)) {
1720 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1721 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1722 	}
1723 }
1724 
1725 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1726 {
1727 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1728 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1729 }
1730 
1731 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1732 {
1733 	int r;
1734 	u32 *hpd;
1735 	const __le32 *fw_data;
1736 	unsigned fw_size;
1737 	u32 *fw;
1738 	size_t mec_hpd_size;
1739 
1740 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1741 
1742 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1743 
1744 	/* take ownership of the relevant compute queues */
1745 	amdgpu_gfx_compute_queue_acquire(adev);
1746 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1747 
1748 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1749 				      AMDGPU_GEM_DOMAIN_VRAM,
1750 				      &adev->gfx.mec.hpd_eop_obj,
1751 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1752 				      (void **)&hpd);
1753 	if (r) {
1754 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1755 		gfx_v9_0_mec_fini(adev);
1756 		return r;
1757 	}
1758 
1759 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1760 
1761 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1762 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1763 
1764 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1765 
1766 	fw_data = (const __le32 *)
1767 		(adev->gfx.mec_fw->data +
1768 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1769 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1770 
1771 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1772 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1773 				      &adev->gfx.mec.mec_fw_obj,
1774 				      &adev->gfx.mec.mec_fw_gpu_addr,
1775 				      (void **)&fw);
1776 	if (r) {
1777 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1778 		gfx_v9_0_mec_fini(adev);
1779 		return r;
1780 	}
1781 
1782 	memcpy(fw, fw_data, fw_size);
1783 
1784 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1785 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1786 
1787 	return 0;
1788 }
1789 
1790 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1791 {
1792 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1793 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1794 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1795 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1796 		(SQ_IND_INDEX__FORCE_READ_MASK));
1797 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1798 }
1799 
1800 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1801 			   uint32_t wave, uint32_t thread,
1802 			   uint32_t regno, uint32_t num, uint32_t *out)
1803 {
1804 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1805 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1806 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1807 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1808 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1809 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1810 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1811 	while (num--)
1812 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1813 }
1814 
1815 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1816 {
1817 	/* type 1 wave data */
1818 	dst[(*no_fields)++] = 1;
1819 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1820 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1821 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1822 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1823 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1824 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1825 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1826 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1827 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1828 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1829 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1830 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1831 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1832 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1833 }
1834 
1835 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1836 				     uint32_t wave, uint32_t start,
1837 				     uint32_t size, uint32_t *dst)
1838 {
1839 	wave_read_regs(
1840 		adev, simd, wave, 0,
1841 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1842 }
1843 
1844 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1845 				     uint32_t wave, uint32_t thread,
1846 				     uint32_t start, uint32_t size,
1847 				     uint32_t *dst)
1848 {
1849 	wave_read_regs(
1850 		adev, simd, wave, thread,
1851 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1852 }
1853 
1854 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1855 				  u32 me, u32 pipe, u32 q, u32 vm)
1856 {
1857 	soc15_grbm_select(adev, me, pipe, q, vm);
1858 }
1859 
1860 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1861 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1862 	.select_se_sh = &gfx_v9_0_select_se_sh,
1863 	.read_wave_data = &gfx_v9_0_read_wave_data,
1864 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1865 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1866 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1867 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1868 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1869 };
1870 
1871 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1872 {
1873 	u32 gb_addr_config;
1874 	int err;
1875 
1876 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1877 
1878 	switch (adev->asic_type) {
1879 	case CHIP_VEGA10:
1880 		adev->gfx.config.max_hw_contexts = 8;
1881 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1885 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1886 		break;
1887 	case CHIP_VEGA12:
1888 		adev->gfx.config.max_hw_contexts = 8;
1889 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1894 		DRM_INFO("fix gfx.config for vega12\n");
1895 		break;
1896 	case CHIP_VEGA20:
1897 		adev->gfx.config.max_hw_contexts = 8;
1898 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1902 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1903 		gb_addr_config &= ~0xf3e777ff;
1904 		gb_addr_config |= 0x22014042;
1905 		/* check vbios table if gpu info is not available */
1906 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1907 		if (err)
1908 			return err;
1909 		break;
1910 	case CHIP_RAVEN:
1911 		adev->gfx.config.max_hw_contexts = 8;
1912 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1913 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1914 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1915 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1916 		if (adev->rev_id >= 8)
1917 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1918 		else
1919 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1920 		break;
1921 	case CHIP_ARCTURUS:
1922 		adev->gfx.config.max_hw_contexts = 8;
1923 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1924 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1925 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1926 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1927 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1928 		gb_addr_config &= ~0xf3e777ff;
1929 		gb_addr_config |= 0x22014042;
1930 		break;
1931 	case CHIP_RENOIR:
1932 		adev->gfx.config.max_hw_contexts = 8;
1933 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1934 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1935 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1936 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1937 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1938 		gb_addr_config &= ~0xf3e777ff;
1939 		gb_addr_config |= 0x22010042;
1940 		break;
1941 	default:
1942 		BUG();
1943 		break;
1944 	}
1945 
1946 	adev->gfx.config.gb_addr_config = gb_addr_config;
1947 
1948 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1949 			REG_GET_FIELD(
1950 					adev->gfx.config.gb_addr_config,
1951 					GB_ADDR_CONFIG,
1952 					NUM_PIPES);
1953 
1954 	adev->gfx.config.max_tile_pipes =
1955 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1956 
1957 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1958 			REG_GET_FIELD(
1959 					adev->gfx.config.gb_addr_config,
1960 					GB_ADDR_CONFIG,
1961 					NUM_BANKS);
1962 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1963 			REG_GET_FIELD(
1964 					adev->gfx.config.gb_addr_config,
1965 					GB_ADDR_CONFIG,
1966 					MAX_COMPRESSED_FRAGS);
1967 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1968 			REG_GET_FIELD(
1969 					adev->gfx.config.gb_addr_config,
1970 					GB_ADDR_CONFIG,
1971 					NUM_RB_PER_SE);
1972 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1973 			REG_GET_FIELD(
1974 					adev->gfx.config.gb_addr_config,
1975 					GB_ADDR_CONFIG,
1976 					NUM_SHADER_ENGINES);
1977 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1978 			REG_GET_FIELD(
1979 					adev->gfx.config.gb_addr_config,
1980 					GB_ADDR_CONFIG,
1981 					PIPE_INTERLEAVE_SIZE));
1982 
1983 	return 0;
1984 }
1985 
1986 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1987 				      int mec, int pipe, int queue)
1988 {
1989 	int r;
1990 	unsigned irq_type;
1991 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1992 
1993 	ring = &adev->gfx.compute_ring[ring_id];
1994 
1995 	/* mec0 is me1 */
1996 	ring->me = mec + 1;
1997 	ring->pipe = pipe;
1998 	ring->queue = queue;
1999 
2000 	ring->ring_obj = NULL;
2001 	ring->use_doorbell = true;
2002 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2003 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2004 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2005 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2006 
2007 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2008 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2009 		+ ring->pipe;
2010 
2011 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2012 	r = amdgpu_ring_init(adev, ring, 1024,
2013 			     &adev->gfx.eop_irq, irq_type);
2014 	if (r)
2015 		return r;
2016 
2017 
2018 	return 0;
2019 }
2020 
2021 static int gfx_v9_0_sw_init(void *handle)
2022 {
2023 	int i, j, k, r, ring_id;
2024 	struct amdgpu_ring *ring;
2025 	struct amdgpu_kiq *kiq;
2026 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2027 
2028 	switch (adev->asic_type) {
2029 	case CHIP_VEGA10:
2030 	case CHIP_VEGA12:
2031 	case CHIP_VEGA20:
2032 	case CHIP_RAVEN:
2033 	case CHIP_ARCTURUS:
2034 	case CHIP_RENOIR:
2035 		adev->gfx.mec.num_mec = 2;
2036 		break;
2037 	default:
2038 		adev->gfx.mec.num_mec = 1;
2039 		break;
2040 	}
2041 
2042 	adev->gfx.mec.num_pipe_per_mec = 4;
2043 	adev->gfx.mec.num_queue_per_pipe = 8;
2044 
2045 	/* EOP Event */
2046 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2047 	if (r)
2048 		return r;
2049 
2050 	/* Privileged reg */
2051 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2052 			      &adev->gfx.priv_reg_irq);
2053 	if (r)
2054 		return r;
2055 
2056 	/* Privileged inst */
2057 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2058 			      &adev->gfx.priv_inst_irq);
2059 	if (r)
2060 		return r;
2061 
2062 	/* ECC error */
2063 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2064 			      &adev->gfx.cp_ecc_error_irq);
2065 	if (r)
2066 		return r;
2067 
2068 	/* FUE error */
2069 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2070 			      &adev->gfx.cp_ecc_error_irq);
2071 	if (r)
2072 		return r;
2073 
2074 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2075 
2076 	gfx_v9_0_scratch_init(adev);
2077 
2078 	r = gfx_v9_0_init_microcode(adev);
2079 	if (r) {
2080 		DRM_ERROR("Failed to load gfx firmware!\n");
2081 		return r;
2082 	}
2083 
2084 	r = adev->gfx.rlc.funcs->init(adev);
2085 	if (r) {
2086 		DRM_ERROR("Failed to init rlc BOs!\n");
2087 		return r;
2088 	}
2089 
2090 	r = gfx_v9_0_mec_init(adev);
2091 	if (r) {
2092 		DRM_ERROR("Failed to init MEC BOs!\n");
2093 		return r;
2094 	}
2095 
2096 	/* set up the gfx ring */
2097 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2098 		ring = &adev->gfx.gfx_ring[i];
2099 		ring->ring_obj = NULL;
2100 		if (!i)
2101 			sprintf(ring->name, "gfx");
2102 		else
2103 			sprintf(ring->name, "gfx_%d", i);
2104 		ring->use_doorbell = true;
2105 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2106 		r = amdgpu_ring_init(adev, ring, 1024,
2107 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2108 		if (r)
2109 			return r;
2110 	}
2111 
2112 	/* set up the compute queues - allocate horizontally across pipes */
2113 	ring_id = 0;
2114 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2115 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2116 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2117 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2118 					continue;
2119 
2120 				r = gfx_v9_0_compute_ring_init(adev,
2121 							       ring_id,
2122 							       i, k, j);
2123 				if (r)
2124 					return r;
2125 
2126 				ring_id++;
2127 			}
2128 		}
2129 	}
2130 
2131 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2132 	if (r) {
2133 		DRM_ERROR("Failed to init KIQ BOs!\n");
2134 		return r;
2135 	}
2136 
2137 	kiq = &adev->gfx.kiq;
2138 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2139 	if (r)
2140 		return r;
2141 
2142 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2143 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2144 	if (r)
2145 		return r;
2146 
2147 	adev->gfx.ce_ram_size = 0x8000;
2148 
2149 	r = gfx_v9_0_gpu_early_init(adev);
2150 	if (r)
2151 		return r;
2152 
2153 	return 0;
2154 }
2155 
2156 
2157 static int gfx_v9_0_sw_fini(void *handle)
2158 {
2159 	int i;
2160 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2161 
2162 	amdgpu_gfx_ras_fini(adev);
2163 
2164 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2165 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2166 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2167 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2168 
2169 	amdgpu_gfx_mqd_sw_fini(adev);
2170 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2171 	amdgpu_gfx_kiq_fini(adev);
2172 
2173 	gfx_v9_0_mec_fini(adev);
2174 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2175 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2176 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2177 				&adev->gfx.rlc.cp_table_gpu_addr,
2178 				(void **)&adev->gfx.rlc.cp_table_ptr);
2179 	}
2180 	gfx_v9_0_free_microcode(adev);
2181 
2182 	return 0;
2183 }
2184 
2185 
2186 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2187 {
2188 	/* TODO */
2189 }
2190 
2191 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2192 {
2193 	u32 data;
2194 
2195 	if (instance == 0xffffffff)
2196 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2197 	else
2198 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2199 
2200 	if (se_num == 0xffffffff)
2201 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2202 	else
2203 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2204 
2205 	if (sh_num == 0xffffffff)
2206 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2207 	else
2208 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2209 
2210 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2211 }
2212 
2213 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2214 {
2215 	u32 data, mask;
2216 
2217 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2218 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2219 
2220 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2221 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2222 
2223 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2224 					 adev->gfx.config.max_sh_per_se);
2225 
2226 	return (~data) & mask;
2227 }
2228 
2229 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2230 {
2231 	int i, j;
2232 	u32 data;
2233 	u32 active_rbs = 0;
2234 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2235 					adev->gfx.config.max_sh_per_se;
2236 
2237 	mutex_lock(&adev->grbm_idx_mutex);
2238 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2239 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2240 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2241 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2242 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2243 					       rb_bitmap_width_per_sh);
2244 		}
2245 	}
2246 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2247 	mutex_unlock(&adev->grbm_idx_mutex);
2248 
2249 	adev->gfx.config.backend_enable_mask = active_rbs;
2250 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2251 }
2252 
2253 #define DEFAULT_SH_MEM_BASES	(0x6000)
2254 #define FIRST_COMPUTE_VMID	(8)
2255 #define LAST_COMPUTE_VMID	(16)
2256 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2257 {
2258 	int i;
2259 	uint32_t sh_mem_config;
2260 	uint32_t sh_mem_bases;
2261 
2262 	/*
2263 	 * Configure apertures:
2264 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2265 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2266 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2267 	 */
2268 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2269 
2270 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2271 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2272 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2273 
2274 	mutex_lock(&adev->srbm_mutex);
2275 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2276 		soc15_grbm_select(adev, 0, 0, 0, i);
2277 		/* CP and shaders */
2278 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2279 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2280 	}
2281 	soc15_grbm_select(adev, 0, 0, 0, 0);
2282 	mutex_unlock(&adev->srbm_mutex);
2283 
2284 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2285 	   acccess. These should be enabled by FW for target VMIDs. */
2286 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2287 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2288 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2289 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2290 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2291 	}
2292 }
2293 
2294 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2295 {
2296 	int vmid;
2297 
2298 	/*
2299 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2300 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2301 	 * the driver can enable them for graphics. VMID0 should maintain
2302 	 * access so that HWS firmware can save/restore entries.
2303 	 */
2304 	for (vmid = 1; vmid < 16; vmid++) {
2305 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2306 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2307 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2308 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2309 	}
2310 }
2311 
2312 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2313 {
2314 	u32 tmp;
2315 	int i;
2316 
2317 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2318 
2319 	gfx_v9_0_tiling_mode_table_init(adev);
2320 
2321 	gfx_v9_0_setup_rb(adev);
2322 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2323 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2324 
2325 	/* XXX SH_MEM regs */
2326 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2327 	mutex_lock(&adev->srbm_mutex);
2328 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2329 		soc15_grbm_select(adev, 0, 0, 0, i);
2330 		/* CP and shaders */
2331 		if (i == 0) {
2332 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2333 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2334 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2335 					    !!amdgpu_noretry);
2336 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2337 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2338 		} else {
2339 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2340 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2341 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2342 					    !!amdgpu_noretry);
2343 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2344 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2345 				(adev->gmc.private_aperture_start >> 48));
2346 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2347 				(adev->gmc.shared_aperture_start >> 48));
2348 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2349 		}
2350 	}
2351 	soc15_grbm_select(adev, 0, 0, 0, 0);
2352 
2353 	mutex_unlock(&adev->srbm_mutex);
2354 
2355 	gfx_v9_0_init_compute_vmid(adev);
2356 	gfx_v9_0_init_gds_vmid(adev);
2357 }
2358 
2359 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2360 {
2361 	u32 i, j, k;
2362 	u32 mask;
2363 
2364 	mutex_lock(&adev->grbm_idx_mutex);
2365 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2366 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2367 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2368 			for (k = 0; k < adev->usec_timeout; k++) {
2369 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2370 					break;
2371 				udelay(1);
2372 			}
2373 			if (k == adev->usec_timeout) {
2374 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2375 						      0xffffffff, 0xffffffff);
2376 				mutex_unlock(&adev->grbm_idx_mutex);
2377 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2378 					 i, j);
2379 				return;
2380 			}
2381 		}
2382 	}
2383 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2384 	mutex_unlock(&adev->grbm_idx_mutex);
2385 
2386 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2387 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2388 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2389 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2390 	for (k = 0; k < adev->usec_timeout; k++) {
2391 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2392 			break;
2393 		udelay(1);
2394 	}
2395 }
2396 
2397 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2398 					       bool enable)
2399 {
2400 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2401 
2402 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2403 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2404 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2405 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2406 
2407 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2408 }
2409 
2410 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2411 {
2412 	/* csib */
2413 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2414 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2415 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2416 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2417 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2418 			adev->gfx.rlc.clear_state_size);
2419 }
2420 
2421 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2422 				int indirect_offset,
2423 				int list_size,
2424 				int *unique_indirect_regs,
2425 				int unique_indirect_reg_count,
2426 				int *indirect_start_offsets,
2427 				int *indirect_start_offsets_count,
2428 				int max_start_offsets_count)
2429 {
2430 	int idx;
2431 
2432 	for (; indirect_offset < list_size; indirect_offset++) {
2433 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2434 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2435 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2436 
2437 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2438 			indirect_offset += 2;
2439 
2440 			/* look for the matching indice */
2441 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2442 				if (unique_indirect_regs[idx] ==
2443 					register_list_format[indirect_offset] ||
2444 					!unique_indirect_regs[idx])
2445 					break;
2446 			}
2447 
2448 			BUG_ON(idx >= unique_indirect_reg_count);
2449 
2450 			if (!unique_indirect_regs[idx])
2451 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2452 
2453 			indirect_offset++;
2454 		}
2455 	}
2456 }
2457 
2458 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2459 {
2460 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2461 	int unique_indirect_reg_count = 0;
2462 
2463 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2464 	int indirect_start_offsets_count = 0;
2465 
2466 	int list_size = 0;
2467 	int i = 0, j = 0;
2468 	u32 tmp = 0;
2469 
2470 	u32 *register_list_format =
2471 		kmemdup(adev->gfx.rlc.register_list_format,
2472 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2473 	if (!register_list_format)
2474 		return -ENOMEM;
2475 
2476 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2477 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2478 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2479 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2480 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2481 				    unique_indirect_regs,
2482 				    unique_indirect_reg_count,
2483 				    indirect_start_offsets,
2484 				    &indirect_start_offsets_count,
2485 				    ARRAY_SIZE(indirect_start_offsets));
2486 
2487 	/* enable auto inc in case it is disabled */
2488 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2489 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2490 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2491 
2492 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2493 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2494 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2495 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2496 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2497 			adev->gfx.rlc.register_restore[i]);
2498 
2499 	/* load indirect register */
2500 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2501 		adev->gfx.rlc.reg_list_format_start);
2502 
2503 	/* direct register portion */
2504 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2505 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2506 			register_list_format[i]);
2507 
2508 	/* indirect register portion */
2509 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2510 		if (register_list_format[i] == 0xFFFFFFFF) {
2511 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2512 			continue;
2513 		}
2514 
2515 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2516 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2517 
2518 		for (j = 0; j < unique_indirect_reg_count; j++) {
2519 			if (register_list_format[i] == unique_indirect_regs[j]) {
2520 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2521 				break;
2522 			}
2523 		}
2524 
2525 		BUG_ON(j >= unique_indirect_reg_count);
2526 
2527 		i++;
2528 	}
2529 
2530 	/* set save/restore list size */
2531 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2532 	list_size = list_size >> 1;
2533 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2534 		adev->gfx.rlc.reg_restore_list_size);
2535 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2536 
2537 	/* write the starting offsets to RLC scratch ram */
2538 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2539 		adev->gfx.rlc.starting_offsets_start);
2540 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2541 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2542 		       indirect_start_offsets[i]);
2543 
2544 	/* load unique indirect regs*/
2545 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2546 		if (unique_indirect_regs[i] != 0) {
2547 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2548 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2549 			       unique_indirect_regs[i] & 0x3FFFF);
2550 
2551 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2552 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2553 			       unique_indirect_regs[i] >> 20);
2554 		}
2555 	}
2556 
2557 	kfree(register_list_format);
2558 	return 0;
2559 }
2560 
2561 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2562 {
2563 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2564 }
2565 
2566 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2567 					     bool enable)
2568 {
2569 	uint32_t data = 0;
2570 	uint32_t default_data = 0;
2571 
2572 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2573 	if (enable == true) {
2574 		/* enable GFXIP control over CGPG */
2575 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2576 		if(default_data != data)
2577 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2578 
2579 		/* update status */
2580 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2581 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2582 		if(default_data != data)
2583 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2584 	} else {
2585 		/* restore GFXIP control over GCPG */
2586 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2587 		if(default_data != data)
2588 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2589 	}
2590 }
2591 
2592 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2593 {
2594 	uint32_t data = 0;
2595 
2596 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2597 			      AMD_PG_SUPPORT_GFX_SMG |
2598 			      AMD_PG_SUPPORT_GFX_DMG)) {
2599 		/* init IDLE_POLL_COUNT = 60 */
2600 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2601 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2602 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2603 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2604 
2605 		/* init RLC PG Delay */
2606 		data = 0;
2607 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2608 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2609 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2610 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2611 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2612 
2613 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2614 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2615 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2616 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2617 
2618 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2619 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2620 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2621 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2622 
2623 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2624 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2625 
2626 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2627 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2628 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2629 
2630 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2631 	}
2632 }
2633 
2634 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2635 						bool enable)
2636 {
2637 	uint32_t data = 0;
2638 	uint32_t default_data = 0;
2639 
2640 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2641 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2642 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2643 			     enable ? 1 : 0);
2644 	if (default_data != data)
2645 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2646 }
2647 
2648 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2649 						bool enable)
2650 {
2651 	uint32_t data = 0;
2652 	uint32_t default_data = 0;
2653 
2654 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2655 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2656 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2657 			     enable ? 1 : 0);
2658 	if(default_data != data)
2659 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2660 }
2661 
2662 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2663 					bool enable)
2664 {
2665 	uint32_t data = 0;
2666 	uint32_t default_data = 0;
2667 
2668 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2669 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2670 			     CP_PG_DISABLE,
2671 			     enable ? 0 : 1);
2672 	if(default_data != data)
2673 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2674 }
2675 
2676 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2677 						bool enable)
2678 {
2679 	uint32_t data, default_data;
2680 
2681 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2682 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2683 			     GFX_POWER_GATING_ENABLE,
2684 			     enable ? 1 : 0);
2685 	if(default_data != data)
2686 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2687 }
2688 
2689 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2690 						bool enable)
2691 {
2692 	uint32_t data, default_data;
2693 
2694 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2695 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2696 			     GFX_PIPELINE_PG_ENABLE,
2697 			     enable ? 1 : 0);
2698 	if(default_data != data)
2699 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2700 
2701 	if (!enable)
2702 		/* read any GFX register to wake up GFX */
2703 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2704 }
2705 
2706 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2707 						       bool enable)
2708 {
2709 	uint32_t data, default_data;
2710 
2711 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2712 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2713 			     STATIC_PER_CU_PG_ENABLE,
2714 			     enable ? 1 : 0);
2715 	if(default_data != data)
2716 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2717 }
2718 
2719 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2720 						bool enable)
2721 {
2722 	uint32_t data, default_data;
2723 
2724 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2725 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2726 			     DYN_PER_CU_PG_ENABLE,
2727 			     enable ? 1 : 0);
2728 	if(default_data != data)
2729 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2730 }
2731 
2732 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2733 {
2734 	gfx_v9_0_init_csb(adev);
2735 
2736 	/*
2737 	 * Rlc save restore list is workable since v2_1.
2738 	 * And it's needed by gfxoff feature.
2739 	 */
2740 	if (adev->gfx.rlc.is_rlc_v2_1) {
2741 		if (adev->asic_type == CHIP_VEGA12 ||
2742 		    (adev->asic_type == CHIP_RAVEN &&
2743 		     adev->rev_id >= 8))
2744 			gfx_v9_1_init_rlc_save_restore_list(adev);
2745 		gfx_v9_0_enable_save_restore_machine(adev);
2746 	}
2747 
2748 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2749 			      AMD_PG_SUPPORT_GFX_SMG |
2750 			      AMD_PG_SUPPORT_GFX_DMG |
2751 			      AMD_PG_SUPPORT_CP |
2752 			      AMD_PG_SUPPORT_GDS |
2753 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2754 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2755 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2756 		gfx_v9_0_init_gfx_power_gating(adev);
2757 	}
2758 }
2759 
2760 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2761 {
2762 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2763 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2764 	gfx_v9_0_wait_for_rlc_serdes(adev);
2765 }
2766 
2767 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2768 {
2769 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2770 	udelay(50);
2771 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2772 	udelay(50);
2773 }
2774 
2775 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2776 {
2777 #ifdef AMDGPU_RLC_DEBUG_RETRY
2778 	u32 rlc_ucode_ver;
2779 #endif
2780 
2781 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2782 	udelay(50);
2783 
2784 	/* carrizo do enable cp interrupt after cp inited */
2785 	if (!(adev->flags & AMD_IS_APU)) {
2786 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2787 		udelay(50);
2788 	}
2789 
2790 #ifdef AMDGPU_RLC_DEBUG_RETRY
2791 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2792 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2793 	if(rlc_ucode_ver == 0x108) {
2794 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2795 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2796 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2797 		 * default is 0x9C4 to create a 100us interval */
2798 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2799 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2800 		 * to disable the page fault retry interrupts, default is
2801 		 * 0x100 (256) */
2802 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2803 	}
2804 #endif
2805 }
2806 
2807 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2808 {
2809 	const struct rlc_firmware_header_v2_0 *hdr;
2810 	const __le32 *fw_data;
2811 	unsigned i, fw_size;
2812 
2813 	if (!adev->gfx.rlc_fw)
2814 		return -EINVAL;
2815 
2816 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2817 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2818 
2819 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2820 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2821 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2822 
2823 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2824 			RLCG_UCODE_LOADING_START_ADDRESS);
2825 	for (i = 0; i < fw_size; i++)
2826 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2827 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2828 
2829 	return 0;
2830 }
2831 
2832 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2833 {
2834 	int r;
2835 
2836 	if (amdgpu_sriov_vf(adev)) {
2837 		gfx_v9_0_init_csb(adev);
2838 		return 0;
2839 	}
2840 
2841 	adev->gfx.rlc.funcs->stop(adev);
2842 
2843 	/* disable CG */
2844 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2845 
2846 	gfx_v9_0_init_pg(adev);
2847 
2848 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2849 		/* legacy rlc firmware loading */
2850 		r = gfx_v9_0_rlc_load_microcode(adev);
2851 		if (r)
2852 			return r;
2853 	}
2854 
2855 	switch (adev->asic_type) {
2856 	case CHIP_RAVEN:
2857 		if (amdgpu_lbpw == 0)
2858 			gfx_v9_0_enable_lbpw(adev, false);
2859 		else
2860 			gfx_v9_0_enable_lbpw(adev, true);
2861 		break;
2862 	case CHIP_VEGA20:
2863 		if (amdgpu_lbpw > 0)
2864 			gfx_v9_0_enable_lbpw(adev, true);
2865 		else
2866 			gfx_v9_0_enable_lbpw(adev, false);
2867 		break;
2868 	default:
2869 		break;
2870 	}
2871 
2872 	adev->gfx.rlc.funcs->start(adev);
2873 
2874 	return 0;
2875 }
2876 
2877 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2878 {
2879 	int i;
2880 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2881 
2882 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2883 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2884 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2885 	if (!enable) {
2886 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2887 			adev->gfx.gfx_ring[i].sched.ready = false;
2888 	}
2889 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2890 	udelay(50);
2891 }
2892 
2893 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2894 {
2895 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2896 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2897 	const struct gfx_firmware_header_v1_0 *me_hdr;
2898 	const __le32 *fw_data;
2899 	unsigned i, fw_size;
2900 
2901 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2902 		return -EINVAL;
2903 
2904 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2905 		adev->gfx.pfp_fw->data;
2906 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2907 		adev->gfx.ce_fw->data;
2908 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2909 		adev->gfx.me_fw->data;
2910 
2911 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2912 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2913 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2914 
2915 	gfx_v9_0_cp_gfx_enable(adev, false);
2916 
2917 	/* PFP */
2918 	fw_data = (const __le32 *)
2919 		(adev->gfx.pfp_fw->data +
2920 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2921 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2922 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2923 	for (i = 0; i < fw_size; i++)
2924 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2925 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2926 
2927 	/* CE */
2928 	fw_data = (const __le32 *)
2929 		(adev->gfx.ce_fw->data +
2930 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2931 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2932 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2933 	for (i = 0; i < fw_size; i++)
2934 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2935 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2936 
2937 	/* ME */
2938 	fw_data = (const __le32 *)
2939 		(adev->gfx.me_fw->data +
2940 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2941 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2942 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2943 	for (i = 0; i < fw_size; i++)
2944 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2945 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2946 
2947 	return 0;
2948 }
2949 
2950 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2951 {
2952 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2953 	const struct cs_section_def *sect = NULL;
2954 	const struct cs_extent_def *ext = NULL;
2955 	int r, i, tmp;
2956 
2957 	/* init the CP */
2958 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2959 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2960 
2961 	gfx_v9_0_cp_gfx_enable(adev, true);
2962 
2963 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2964 	if (r) {
2965 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2966 		return r;
2967 	}
2968 
2969 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2970 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2971 
2972 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2973 	amdgpu_ring_write(ring, 0x80000000);
2974 	amdgpu_ring_write(ring, 0x80000000);
2975 
2976 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2977 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2978 			if (sect->id == SECT_CONTEXT) {
2979 				amdgpu_ring_write(ring,
2980 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2981 					       ext->reg_count));
2982 				amdgpu_ring_write(ring,
2983 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2984 				for (i = 0; i < ext->reg_count; i++)
2985 					amdgpu_ring_write(ring, ext->extent[i]);
2986 			}
2987 		}
2988 	}
2989 
2990 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2991 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2992 
2993 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2994 	amdgpu_ring_write(ring, 0);
2995 
2996 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2997 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2998 	amdgpu_ring_write(ring, 0x8000);
2999 	amdgpu_ring_write(ring, 0x8000);
3000 
3001 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3002 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3003 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3004 	amdgpu_ring_write(ring, tmp);
3005 	amdgpu_ring_write(ring, 0);
3006 
3007 	amdgpu_ring_commit(ring);
3008 
3009 	return 0;
3010 }
3011 
3012 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3013 {
3014 	struct amdgpu_ring *ring;
3015 	u32 tmp;
3016 	u32 rb_bufsz;
3017 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3018 
3019 	/* Set the write pointer delay */
3020 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3021 
3022 	/* set the RB to use vmid 0 */
3023 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3024 
3025 	/* Set ring buffer size */
3026 	ring = &adev->gfx.gfx_ring[0];
3027 	rb_bufsz = order_base_2(ring->ring_size / 8);
3028 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3029 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3030 #ifdef __BIG_ENDIAN
3031 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3032 #endif
3033 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3034 
3035 	/* Initialize the ring buffer's write pointers */
3036 	ring->wptr = 0;
3037 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3038 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3039 
3040 	/* set the wb address wether it's enabled or not */
3041 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3042 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3043 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3044 
3045 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3046 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3047 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3048 
3049 	mdelay(1);
3050 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3051 
3052 	rb_addr = ring->gpu_addr >> 8;
3053 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3054 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3055 
3056 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3057 	if (ring->use_doorbell) {
3058 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3059 				    DOORBELL_OFFSET, ring->doorbell_index);
3060 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3061 				    DOORBELL_EN, 1);
3062 	} else {
3063 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3064 	}
3065 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3066 
3067 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3068 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3069 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3070 
3071 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3072 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3073 
3074 
3075 	/* start the ring */
3076 	gfx_v9_0_cp_gfx_start(adev);
3077 	ring->sched.ready = true;
3078 
3079 	return 0;
3080 }
3081 
3082 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3083 {
3084 	int i;
3085 
3086 	if (enable) {
3087 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3088 	} else {
3089 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3090 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3091 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3092 			adev->gfx.compute_ring[i].sched.ready = false;
3093 		adev->gfx.kiq.ring.sched.ready = false;
3094 	}
3095 	udelay(50);
3096 }
3097 
3098 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3099 {
3100 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3101 	const __le32 *fw_data;
3102 	unsigned i;
3103 	u32 tmp;
3104 
3105 	if (!adev->gfx.mec_fw)
3106 		return -EINVAL;
3107 
3108 	gfx_v9_0_cp_compute_enable(adev, false);
3109 
3110 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3111 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3112 
3113 	fw_data = (const __le32 *)
3114 		(adev->gfx.mec_fw->data +
3115 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3116 	tmp = 0;
3117 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3118 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3119 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3120 
3121 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3122 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3123 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3124 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3125 
3126 	/* MEC1 */
3127 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3128 			 mec_hdr->jt_offset);
3129 	for (i = 0; i < mec_hdr->jt_size; i++)
3130 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3131 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3132 
3133 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3134 			adev->gfx.mec_fw_version);
3135 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3136 
3137 	return 0;
3138 }
3139 
3140 /* KIQ functions */
3141 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3142 {
3143 	uint32_t tmp;
3144 	struct amdgpu_device *adev = ring->adev;
3145 
3146 	/* tell RLC which is KIQ queue */
3147 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3148 	tmp &= 0xffffff00;
3149 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3150 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3151 	tmp |= 0x80;
3152 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3153 }
3154 
3155 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3156 {
3157 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3158 	uint64_t queue_mask = 0;
3159 	int r, i;
3160 
3161 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3162 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3163 			continue;
3164 
3165 		/* This situation may be hit in the future if a new HW
3166 		 * generation exposes more than 64 queues. If so, the
3167 		 * definition of queue_mask needs updating */
3168 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3169 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3170 			break;
3171 		}
3172 
3173 		queue_mask |= (1ull << i);
3174 	}
3175 
3176 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3177 	if (r) {
3178 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3179 		return r;
3180 	}
3181 
3182 	/* set resources */
3183 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3184 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3185 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3186 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3187 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3188 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3189 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3190 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3191 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3192 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3193 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3194 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3195 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3196 
3197 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3198 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3199 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3200 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3201 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3202 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3203 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3204 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3205 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3206 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3207 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3208 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3209 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3210 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3211 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3212 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3213 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3214 	}
3215 
3216 	r = amdgpu_ring_test_helper(kiq_ring);
3217 	if (r)
3218 		DRM_ERROR("KCQ enable failed\n");
3219 
3220 	return r;
3221 }
3222 
3223 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3224 {
3225 	struct amdgpu_device *adev = ring->adev;
3226 	struct v9_mqd *mqd = ring->mqd_ptr;
3227 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3228 	uint32_t tmp;
3229 
3230 	mqd->header = 0xC0310800;
3231 	mqd->compute_pipelinestat_enable = 0x00000001;
3232 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3233 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3234 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3235 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3236 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3237 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3238 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3239 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3240 	mqd->compute_misc_reserved = 0x00000003;
3241 
3242 	mqd->dynamic_cu_mask_addr_lo =
3243 		lower_32_bits(ring->mqd_gpu_addr
3244 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3245 	mqd->dynamic_cu_mask_addr_hi =
3246 		upper_32_bits(ring->mqd_gpu_addr
3247 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3248 
3249 	eop_base_addr = ring->eop_gpu_addr >> 8;
3250 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3251 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3252 
3253 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3254 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3255 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3256 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3257 
3258 	mqd->cp_hqd_eop_control = tmp;
3259 
3260 	/* enable doorbell? */
3261 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3262 
3263 	if (ring->use_doorbell) {
3264 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3265 				    DOORBELL_OFFSET, ring->doorbell_index);
3266 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3267 				    DOORBELL_EN, 1);
3268 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3269 				    DOORBELL_SOURCE, 0);
3270 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3271 				    DOORBELL_HIT, 0);
3272 	} else {
3273 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3274 					 DOORBELL_EN, 0);
3275 	}
3276 
3277 	mqd->cp_hqd_pq_doorbell_control = tmp;
3278 
3279 	/* disable the queue if it's active */
3280 	ring->wptr = 0;
3281 	mqd->cp_hqd_dequeue_request = 0;
3282 	mqd->cp_hqd_pq_rptr = 0;
3283 	mqd->cp_hqd_pq_wptr_lo = 0;
3284 	mqd->cp_hqd_pq_wptr_hi = 0;
3285 
3286 	/* set the pointer to the MQD */
3287 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3288 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3289 
3290 	/* set MQD vmid to 0 */
3291 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3292 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3293 	mqd->cp_mqd_control = tmp;
3294 
3295 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3296 	hqd_gpu_addr = ring->gpu_addr >> 8;
3297 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3298 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3299 
3300 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3301 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3302 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3303 			    (order_base_2(ring->ring_size / 4) - 1));
3304 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3305 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3306 #ifdef __BIG_ENDIAN
3307 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3308 #endif
3309 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3310 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3311 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3312 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3313 	mqd->cp_hqd_pq_control = tmp;
3314 
3315 	/* set the wb address whether it's enabled or not */
3316 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3317 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3318 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3319 		upper_32_bits(wb_gpu_addr) & 0xffff;
3320 
3321 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3322 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3323 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3324 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3325 
3326 	tmp = 0;
3327 	/* enable the doorbell if requested */
3328 	if (ring->use_doorbell) {
3329 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3330 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3331 				DOORBELL_OFFSET, ring->doorbell_index);
3332 
3333 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3334 					 DOORBELL_EN, 1);
3335 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3336 					 DOORBELL_SOURCE, 0);
3337 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3338 					 DOORBELL_HIT, 0);
3339 	}
3340 
3341 	mqd->cp_hqd_pq_doorbell_control = tmp;
3342 
3343 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3344 	ring->wptr = 0;
3345 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3346 
3347 	/* set the vmid for the queue */
3348 	mqd->cp_hqd_vmid = 0;
3349 
3350 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3351 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3352 	mqd->cp_hqd_persistent_state = tmp;
3353 
3354 	/* set MIN_IB_AVAIL_SIZE */
3355 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3356 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3357 	mqd->cp_hqd_ib_control = tmp;
3358 
3359 	/* activate the queue */
3360 	mqd->cp_hqd_active = 1;
3361 
3362 	return 0;
3363 }
3364 
3365 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3366 {
3367 	struct amdgpu_device *adev = ring->adev;
3368 	struct v9_mqd *mqd = ring->mqd_ptr;
3369 	int j;
3370 
3371 	/* disable wptr polling */
3372 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3373 
3374 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3375 	       mqd->cp_hqd_eop_base_addr_lo);
3376 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3377 	       mqd->cp_hqd_eop_base_addr_hi);
3378 
3379 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3380 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3381 	       mqd->cp_hqd_eop_control);
3382 
3383 	/* enable doorbell? */
3384 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3385 	       mqd->cp_hqd_pq_doorbell_control);
3386 
3387 	/* disable the queue if it's active */
3388 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3389 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3390 		for (j = 0; j < adev->usec_timeout; j++) {
3391 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3392 				break;
3393 			udelay(1);
3394 		}
3395 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3396 		       mqd->cp_hqd_dequeue_request);
3397 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3398 		       mqd->cp_hqd_pq_rptr);
3399 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3400 		       mqd->cp_hqd_pq_wptr_lo);
3401 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3402 		       mqd->cp_hqd_pq_wptr_hi);
3403 	}
3404 
3405 	/* set the pointer to the MQD */
3406 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3407 	       mqd->cp_mqd_base_addr_lo);
3408 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3409 	       mqd->cp_mqd_base_addr_hi);
3410 
3411 	/* set MQD vmid to 0 */
3412 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3413 	       mqd->cp_mqd_control);
3414 
3415 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3416 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3417 	       mqd->cp_hqd_pq_base_lo);
3418 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3419 	       mqd->cp_hqd_pq_base_hi);
3420 
3421 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3422 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3423 	       mqd->cp_hqd_pq_control);
3424 
3425 	/* set the wb address whether it's enabled or not */
3426 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3427 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3428 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3429 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3430 
3431 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3432 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3433 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3434 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3435 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3436 
3437 	/* enable the doorbell if requested */
3438 	if (ring->use_doorbell) {
3439 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3440 					(adev->doorbell_index.kiq * 2) << 2);
3441 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3442 					(adev->doorbell_index.userqueue_end * 2) << 2);
3443 	}
3444 
3445 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3446 	       mqd->cp_hqd_pq_doorbell_control);
3447 
3448 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3449 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3450 	       mqd->cp_hqd_pq_wptr_lo);
3451 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3452 	       mqd->cp_hqd_pq_wptr_hi);
3453 
3454 	/* set the vmid for the queue */
3455 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3456 
3457 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3458 	       mqd->cp_hqd_persistent_state);
3459 
3460 	/* activate the queue */
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3462 	       mqd->cp_hqd_active);
3463 
3464 	if (ring->use_doorbell)
3465 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3466 
3467 	return 0;
3468 }
3469 
3470 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3471 {
3472 	struct amdgpu_device *adev = ring->adev;
3473 	int j;
3474 
3475 	/* disable the queue if it's active */
3476 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3477 
3478 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3479 
3480 		for (j = 0; j < adev->usec_timeout; j++) {
3481 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3482 				break;
3483 			udelay(1);
3484 		}
3485 
3486 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3487 			DRM_DEBUG("KIQ dequeue request failed.\n");
3488 
3489 			/* Manual disable if dequeue request times out */
3490 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3491 		}
3492 
3493 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3494 		      0);
3495 	}
3496 
3497 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3498 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3499 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3500 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3501 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3502 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3503 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3504 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3505 
3506 	return 0;
3507 }
3508 
3509 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3510 {
3511 	struct amdgpu_device *adev = ring->adev;
3512 	struct v9_mqd *mqd = ring->mqd_ptr;
3513 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3514 
3515 	gfx_v9_0_kiq_setting(ring);
3516 
3517 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3518 		/* reset MQD to a clean status */
3519 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3520 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3521 
3522 		/* reset ring buffer */
3523 		ring->wptr = 0;
3524 		amdgpu_ring_clear_ring(ring);
3525 
3526 		mutex_lock(&adev->srbm_mutex);
3527 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3528 		gfx_v9_0_kiq_init_register(ring);
3529 		soc15_grbm_select(adev, 0, 0, 0, 0);
3530 		mutex_unlock(&adev->srbm_mutex);
3531 	} else {
3532 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3533 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3534 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3535 		mutex_lock(&adev->srbm_mutex);
3536 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3537 		gfx_v9_0_mqd_init(ring);
3538 		gfx_v9_0_kiq_init_register(ring);
3539 		soc15_grbm_select(adev, 0, 0, 0, 0);
3540 		mutex_unlock(&adev->srbm_mutex);
3541 
3542 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3543 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3544 	}
3545 
3546 	return 0;
3547 }
3548 
3549 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3550 {
3551 	struct amdgpu_device *adev = ring->adev;
3552 	struct v9_mqd *mqd = ring->mqd_ptr;
3553 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3554 
3555 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3556 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3557 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3558 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3559 		mutex_lock(&adev->srbm_mutex);
3560 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3561 		gfx_v9_0_mqd_init(ring);
3562 		soc15_grbm_select(adev, 0, 0, 0, 0);
3563 		mutex_unlock(&adev->srbm_mutex);
3564 
3565 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3566 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3567 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3568 		/* reset MQD to a clean status */
3569 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3570 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3571 
3572 		/* reset ring buffer */
3573 		ring->wptr = 0;
3574 		amdgpu_ring_clear_ring(ring);
3575 	} else {
3576 		amdgpu_ring_clear_ring(ring);
3577 	}
3578 
3579 	return 0;
3580 }
3581 
3582 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3583 {
3584 	struct amdgpu_ring *ring;
3585 	int r;
3586 
3587 	ring = &adev->gfx.kiq.ring;
3588 
3589 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3590 	if (unlikely(r != 0))
3591 		return r;
3592 
3593 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3594 	if (unlikely(r != 0))
3595 		return r;
3596 
3597 	gfx_v9_0_kiq_init_queue(ring);
3598 	amdgpu_bo_kunmap(ring->mqd_obj);
3599 	ring->mqd_ptr = NULL;
3600 	amdgpu_bo_unreserve(ring->mqd_obj);
3601 	ring->sched.ready = true;
3602 	return 0;
3603 }
3604 
3605 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3606 {
3607 	struct amdgpu_ring *ring = NULL;
3608 	int r = 0, i;
3609 
3610 	gfx_v9_0_cp_compute_enable(adev, true);
3611 
3612 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3613 		ring = &adev->gfx.compute_ring[i];
3614 
3615 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3616 		if (unlikely(r != 0))
3617 			goto done;
3618 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3619 		if (!r) {
3620 			r = gfx_v9_0_kcq_init_queue(ring);
3621 			amdgpu_bo_kunmap(ring->mqd_obj);
3622 			ring->mqd_ptr = NULL;
3623 		}
3624 		amdgpu_bo_unreserve(ring->mqd_obj);
3625 		if (r)
3626 			goto done;
3627 	}
3628 
3629 	r = gfx_v9_0_kiq_kcq_enable(adev);
3630 done:
3631 	return r;
3632 }
3633 
3634 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3635 {
3636 	int r, i;
3637 	struct amdgpu_ring *ring;
3638 
3639 	if (!(adev->flags & AMD_IS_APU))
3640 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3641 
3642 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3643 		if (adev->asic_type != CHIP_ARCTURUS) {
3644 			/* legacy firmware loading */
3645 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3646 			if (r)
3647 				return r;
3648 		}
3649 
3650 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3651 		if (r)
3652 			return r;
3653 	}
3654 
3655 	r = gfx_v9_0_kiq_resume(adev);
3656 	if (r)
3657 		return r;
3658 
3659 	if (adev->asic_type != CHIP_ARCTURUS) {
3660 		r = gfx_v9_0_cp_gfx_resume(adev);
3661 		if (r)
3662 			return r;
3663 	}
3664 
3665 	r = gfx_v9_0_kcq_resume(adev);
3666 	if (r)
3667 		return r;
3668 
3669 	if (adev->asic_type != CHIP_ARCTURUS) {
3670 		ring = &adev->gfx.gfx_ring[0];
3671 		r = amdgpu_ring_test_helper(ring);
3672 		if (r)
3673 			return r;
3674 	}
3675 
3676 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3677 		ring = &adev->gfx.compute_ring[i];
3678 		amdgpu_ring_test_helper(ring);
3679 	}
3680 
3681 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3682 
3683 	return 0;
3684 }
3685 
3686 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3687 {
3688 	if (adev->asic_type != CHIP_ARCTURUS)
3689 		gfx_v9_0_cp_gfx_enable(adev, enable);
3690 	gfx_v9_0_cp_compute_enable(adev, enable);
3691 }
3692 
3693 static int gfx_v9_0_hw_init(void *handle)
3694 {
3695 	int r;
3696 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3697 
3698 	if (!amdgpu_sriov_vf(adev))
3699 		gfx_v9_0_init_golden_registers(adev);
3700 
3701 	gfx_v9_0_constants_init(adev);
3702 
3703 	r = gfx_v9_0_csb_vram_pin(adev);
3704 	if (r)
3705 		return r;
3706 
3707 	r = adev->gfx.rlc.funcs->resume(adev);
3708 	if (r)
3709 		return r;
3710 
3711 	r = gfx_v9_0_cp_resume(adev);
3712 	if (r)
3713 		return r;
3714 
3715 	return r;
3716 }
3717 
3718 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3719 {
3720 	int r, i;
3721 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3722 
3723 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3724 	if (r)
3725 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3726 
3727 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3728 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3729 
3730 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3731 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3732 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3733 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3734 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3735 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3736 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3737 		amdgpu_ring_write(kiq_ring, 0);
3738 		amdgpu_ring_write(kiq_ring, 0);
3739 		amdgpu_ring_write(kiq_ring, 0);
3740 	}
3741 	r = amdgpu_ring_test_helper(kiq_ring);
3742 	if (r)
3743 		DRM_ERROR("KCQ disable failed\n");
3744 
3745 	return r;
3746 }
3747 
3748 static int gfx_v9_0_hw_fini(void *handle)
3749 {
3750 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3751 
3752 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3753 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3754 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3755 
3756 	/* DF freeze and kcq disable will fail */
3757 	if (!amdgpu_ras_intr_triggered())
3758 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3759 		gfx_v9_0_kcq_disable(adev);
3760 
3761 	if (amdgpu_sriov_vf(adev)) {
3762 		gfx_v9_0_cp_gfx_enable(adev, false);
3763 		/* must disable polling for SRIOV when hw finished, otherwise
3764 		 * CPC engine may still keep fetching WB address which is already
3765 		 * invalid after sw finished and trigger DMAR reading error in
3766 		 * hypervisor side.
3767 		 */
3768 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3769 		return 0;
3770 	}
3771 
3772 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3773 	 * otherwise KIQ is hanging when binding back
3774 	 */
3775 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3776 		mutex_lock(&adev->srbm_mutex);
3777 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3778 				adev->gfx.kiq.ring.pipe,
3779 				adev->gfx.kiq.ring.queue, 0);
3780 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3781 		soc15_grbm_select(adev, 0, 0, 0, 0);
3782 		mutex_unlock(&adev->srbm_mutex);
3783 	}
3784 
3785 	gfx_v9_0_cp_enable(adev, false);
3786 	adev->gfx.rlc.funcs->stop(adev);
3787 
3788 	gfx_v9_0_csb_vram_unpin(adev);
3789 
3790 	return 0;
3791 }
3792 
3793 static int gfx_v9_0_suspend(void *handle)
3794 {
3795 	return gfx_v9_0_hw_fini(handle);
3796 }
3797 
3798 static int gfx_v9_0_resume(void *handle)
3799 {
3800 	return gfx_v9_0_hw_init(handle);
3801 }
3802 
3803 static bool gfx_v9_0_is_idle(void *handle)
3804 {
3805 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3806 
3807 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3808 				GRBM_STATUS, GUI_ACTIVE))
3809 		return false;
3810 	else
3811 		return true;
3812 }
3813 
3814 static int gfx_v9_0_wait_for_idle(void *handle)
3815 {
3816 	unsigned i;
3817 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3818 
3819 	for (i = 0; i < adev->usec_timeout; i++) {
3820 		if (gfx_v9_0_is_idle(handle))
3821 			return 0;
3822 		udelay(1);
3823 	}
3824 	return -ETIMEDOUT;
3825 }
3826 
3827 static int gfx_v9_0_soft_reset(void *handle)
3828 {
3829 	u32 grbm_soft_reset = 0;
3830 	u32 tmp;
3831 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832 
3833 	/* GRBM_STATUS */
3834 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3835 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3836 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3837 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3838 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3839 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3840 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3841 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3844 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3845 	}
3846 
3847 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3848 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3850 	}
3851 
3852 	/* GRBM_STATUS2 */
3853 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3854 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3855 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3856 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3857 
3858 
3859 	if (grbm_soft_reset) {
3860 		/* stop the rlc */
3861 		adev->gfx.rlc.funcs->stop(adev);
3862 
3863 		if (adev->asic_type != CHIP_ARCTURUS)
3864 			/* Disable GFX parsing/prefetching */
3865 			gfx_v9_0_cp_gfx_enable(adev, false);
3866 
3867 		/* Disable MEC parsing/prefetching */
3868 		gfx_v9_0_cp_compute_enable(adev, false);
3869 
3870 		if (grbm_soft_reset) {
3871 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3872 			tmp |= grbm_soft_reset;
3873 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3875 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3876 
3877 			udelay(50);
3878 
3879 			tmp &= ~grbm_soft_reset;
3880 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882 		}
3883 
3884 		/* Wait a little for things to settle down */
3885 		udelay(50);
3886 	}
3887 	return 0;
3888 }
3889 
3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3891 {
3892 	uint64_t clock;
3893 
3894 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3895 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3896 		uint32_t tmp, lsb, msb, i = 0;
3897 		do {
3898 			if (i != 0)
3899 				udelay(1);
3900 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3901 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3902 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3903 			i++;
3904 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3905 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3906 	} else {
3907 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3908 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3909 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3910 	}
3911 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3912 	return clock;
3913 }
3914 
3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3916 					  uint32_t vmid,
3917 					  uint32_t gds_base, uint32_t gds_size,
3918 					  uint32_t gws_base, uint32_t gws_size,
3919 					  uint32_t oa_base, uint32_t oa_size)
3920 {
3921 	struct amdgpu_device *adev = ring->adev;
3922 
3923 	/* GDS Base */
3924 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3925 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3926 				   gds_base);
3927 
3928 	/* GDS Size */
3929 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3930 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3931 				   gds_size);
3932 
3933 	/* GWS */
3934 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3935 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3936 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3937 
3938 	/* OA */
3939 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3940 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3941 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3942 }
3943 
3944 static const u32 vgpr_init_compute_shader[] =
3945 {
3946 	0xb07c0000, 0xbe8000ff,
3947 	0x000000f8, 0xbf110800,
3948 	0x7e000280, 0x7e020280,
3949 	0x7e040280, 0x7e060280,
3950 	0x7e080280, 0x7e0a0280,
3951 	0x7e0c0280, 0x7e0e0280,
3952 	0x80808800, 0xbe803200,
3953 	0xbf84fff5, 0xbf9c0000,
3954 	0xd28c0001, 0x0001007f,
3955 	0xd28d0001, 0x0002027e,
3956 	0x10020288, 0xb8810904,
3957 	0xb7814000, 0xd1196a01,
3958 	0x00000301, 0xbe800087,
3959 	0xbefc00c1, 0xd89c4000,
3960 	0x00020201, 0xd89cc080,
3961 	0x00040401, 0x320202ff,
3962 	0x00000800, 0x80808100,
3963 	0xbf84fff8, 0x7e020280,
3964 	0xbf810000, 0x00000000,
3965 };
3966 
3967 static const u32 sgpr_init_compute_shader[] =
3968 {
3969 	0xb07c0000, 0xbe8000ff,
3970 	0x0000005f, 0xbee50080,
3971 	0xbe812c65, 0xbe822c65,
3972 	0xbe832c65, 0xbe842c65,
3973 	0xbe852c65, 0xb77c0005,
3974 	0x80808500, 0xbf84fff8,
3975 	0xbe800080, 0xbf810000,
3976 };
3977 
3978 static const struct soc15_reg_entry vgpr_init_regs[] = {
3979    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3980    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3981    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3982    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3983    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3984    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3989 };
3990 
3991 static const struct soc15_reg_entry sgpr_init_regs[] = {
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3995    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3996    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3997    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3998    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3999    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4000    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4001    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4002 };
4003 
4004 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4005    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4006    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4007    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4008    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4009    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4010    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4011    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4012    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4013    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4014    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4015    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4016    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4017    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4018    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4019    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4020    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4021    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4022    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4023    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4024    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4025    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4026    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4027    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4028    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4029    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4030    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4031    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4032    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4033    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4034    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4035    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4036    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4037    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4038 };
4039 
4040 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4041 {
4042 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4043 	int i, r;
4044 
4045 	/* only support when RAS is enabled */
4046 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4047 		return 0;
4048 
4049 	r = amdgpu_ring_alloc(ring, 7);
4050 	if (r) {
4051 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4052 			ring->name, r);
4053 		return r;
4054 	}
4055 
4056 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4057 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4058 
4059 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4060 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4061 				PACKET3_DMA_DATA_DST_SEL(1) |
4062 				PACKET3_DMA_DATA_SRC_SEL(2) |
4063 				PACKET3_DMA_DATA_ENGINE(0)));
4064 	amdgpu_ring_write(ring, 0);
4065 	amdgpu_ring_write(ring, 0);
4066 	amdgpu_ring_write(ring, 0);
4067 	amdgpu_ring_write(ring, 0);
4068 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4069 				adev->gds.gds_size);
4070 
4071 	amdgpu_ring_commit(ring);
4072 
4073 	for (i = 0; i < adev->usec_timeout; i++) {
4074 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4075 			break;
4076 		udelay(1);
4077 	}
4078 
4079 	if (i >= adev->usec_timeout)
4080 		r = -ETIMEDOUT;
4081 
4082 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4083 
4084 	return r;
4085 }
4086 
4087 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4088 {
4089 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4090 	struct amdgpu_ib ib;
4091 	struct dma_fence *f = NULL;
4092 	int r, i, j, k;
4093 	unsigned total_size, vgpr_offset, sgpr_offset;
4094 	u64 gpu_addr;
4095 
4096 	/* only support when RAS is enabled */
4097 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4098 		return 0;
4099 
4100 	/* bail if the compute ring is not ready */
4101 	if (!ring->sched.ready)
4102 		return 0;
4103 
4104 	total_size =
4105 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4106 	total_size +=
4107 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4108 	total_size = ALIGN(total_size, 256);
4109 	vgpr_offset = total_size;
4110 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4111 	sgpr_offset = total_size;
4112 	total_size += sizeof(sgpr_init_compute_shader);
4113 
4114 	/* allocate an indirect buffer to put the commands in */
4115 	memset(&ib, 0, sizeof(ib));
4116 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4117 	if (r) {
4118 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4119 		return r;
4120 	}
4121 
4122 	/* load the compute shaders */
4123 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4124 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4125 
4126 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4127 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4128 
4129 	/* init the ib length to 0 */
4130 	ib.length_dw = 0;
4131 
4132 	/* VGPR */
4133 	/* write the register state for the compute dispatch */
4134 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4135 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4136 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4137 								- PACKET3_SET_SH_REG_START;
4138 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4139 	}
4140 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4141 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4142 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4143 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4144 							- PACKET3_SET_SH_REG_START;
4145 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4146 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4147 
4148 	/* write dispatch packet */
4149 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4150 	ib.ptr[ib.length_dw++] = 128; /* x */
4151 	ib.ptr[ib.length_dw++] = 1; /* y */
4152 	ib.ptr[ib.length_dw++] = 1; /* z */
4153 	ib.ptr[ib.length_dw++] =
4154 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4155 
4156 	/* write CS partial flush packet */
4157 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4158 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4159 
4160 	/* SGPR */
4161 	/* write the register state for the compute dispatch */
4162 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4163 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4164 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4165 								- PACKET3_SET_SH_REG_START;
4166 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4167 	}
4168 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4169 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4170 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4171 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4172 							- PACKET3_SET_SH_REG_START;
4173 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4174 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4175 
4176 	/* write dispatch packet */
4177 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4178 	ib.ptr[ib.length_dw++] = 128; /* x */
4179 	ib.ptr[ib.length_dw++] = 1; /* y */
4180 	ib.ptr[ib.length_dw++] = 1; /* z */
4181 	ib.ptr[ib.length_dw++] =
4182 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4183 
4184 	/* write CS partial flush packet */
4185 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4186 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4187 
4188 	/* shedule the ib on the ring */
4189 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4190 	if (r) {
4191 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4192 		goto fail;
4193 	}
4194 
4195 	/* wait for the GPU to finish processing the IB */
4196 	r = dma_fence_wait(f, false);
4197 	if (r) {
4198 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4199 		goto fail;
4200 	}
4201 
4202 	/* read back registers to clear the counters */
4203 	mutex_lock(&adev->grbm_idx_mutex);
4204 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4205 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4206 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4207 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4208 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4209 			}
4210 		}
4211 	}
4212 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4213 	mutex_unlock(&adev->grbm_idx_mutex);
4214 
4215 fail:
4216 	amdgpu_ib_free(adev, &ib, NULL);
4217 	dma_fence_put(f);
4218 
4219 	return r;
4220 }
4221 
4222 static int gfx_v9_0_early_init(void *handle)
4223 {
4224 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4225 
4226 	if (adev->asic_type == CHIP_ARCTURUS)
4227 		adev->gfx.num_gfx_rings = 0;
4228 	else
4229 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4230 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4231 	gfx_v9_0_set_ring_funcs(adev);
4232 	gfx_v9_0_set_irq_funcs(adev);
4233 	gfx_v9_0_set_gds_init(adev);
4234 	gfx_v9_0_set_rlc_funcs(adev);
4235 
4236 	return 0;
4237 }
4238 
4239 static int gfx_v9_0_ecc_late_init(void *handle)
4240 {
4241 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4242 	int r;
4243 
4244 	r = amdgpu_gfx_ras_late_init(adev);
4245 	if (r)
4246 		return r;
4247 
4248 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4249 	if (r)
4250 		return r;
4251 
4252 	/* requires IBs so do in late init after IB pool is initialized */
4253 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4254 	if (r)
4255 		return r;
4256 
4257 	return 0;
4258 }
4259 
4260 static int gfx_v9_0_late_init(void *handle)
4261 {
4262 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4263 	int r;
4264 
4265 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4266 	if (r)
4267 		return r;
4268 
4269 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4270 	if (r)
4271 		return r;
4272 
4273 	r = gfx_v9_0_ecc_late_init(handle);
4274 	if (r)
4275 		return r;
4276 
4277 	return 0;
4278 }
4279 
4280 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4281 {
4282 	uint32_t rlc_setting;
4283 
4284 	/* if RLC is not enabled, do nothing */
4285 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4286 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4287 		return false;
4288 
4289 	return true;
4290 }
4291 
4292 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4293 {
4294 	uint32_t data;
4295 	unsigned i;
4296 
4297 	data = RLC_SAFE_MODE__CMD_MASK;
4298 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4299 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4300 
4301 	/* wait for RLC_SAFE_MODE */
4302 	for (i = 0; i < adev->usec_timeout; i++) {
4303 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4304 			break;
4305 		udelay(1);
4306 	}
4307 }
4308 
4309 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4310 {
4311 	uint32_t data;
4312 
4313 	data = RLC_SAFE_MODE__CMD_MASK;
4314 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4315 }
4316 
4317 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4318 						bool enable)
4319 {
4320 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4321 
4322 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4323 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4324 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4325 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4326 	} else {
4327 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4328 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4329 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4330 	}
4331 
4332 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4333 }
4334 
4335 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4336 						bool enable)
4337 {
4338 	/* TODO: double check if we need to perform under safe mode */
4339 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4340 
4341 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4342 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4343 	else
4344 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4345 
4346 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4347 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4348 	else
4349 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4350 
4351 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4352 }
4353 
4354 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4355 						      bool enable)
4356 {
4357 	uint32_t data, def;
4358 
4359 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4360 
4361 	/* It is disabled by HW by default */
4362 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4363 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4364 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4365 
4366 		if (adev->asic_type != CHIP_VEGA12)
4367 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4368 
4369 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4370 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4371 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4372 
4373 		/* only for Vega10 & Raven1 */
4374 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4375 
4376 		if (def != data)
4377 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4378 
4379 		/* MGLS is a global flag to control all MGLS in GFX */
4380 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4381 			/* 2 - RLC memory Light sleep */
4382 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4383 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4384 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4385 				if (def != data)
4386 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4387 			}
4388 			/* 3 - CP memory Light sleep */
4389 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4390 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4391 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4392 				if (def != data)
4393 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4394 			}
4395 		}
4396 	} else {
4397 		/* 1 - MGCG_OVERRIDE */
4398 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4399 
4400 		if (adev->asic_type != CHIP_VEGA12)
4401 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4402 
4403 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4404 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4405 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4406 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4407 
4408 		if (def != data)
4409 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4410 
4411 		/* 2 - disable MGLS in RLC */
4412 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4413 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4414 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4415 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4416 		}
4417 
4418 		/* 3 - disable MGLS in CP */
4419 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4420 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4421 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4422 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4423 		}
4424 	}
4425 
4426 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4427 }
4428 
4429 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4430 					   bool enable)
4431 {
4432 	uint32_t data, def;
4433 
4434 	if (adev->asic_type == CHIP_ARCTURUS)
4435 		return;
4436 
4437 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4438 
4439 	/* Enable 3D CGCG/CGLS */
4440 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4441 		/* write cmd to clear cgcg/cgls ov */
4442 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4443 		/* unset CGCG override */
4444 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4445 		/* update CGCG and CGLS override bits */
4446 		if (def != data)
4447 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4448 
4449 		/* enable 3Dcgcg FSM(0x0000363f) */
4450 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4451 
4452 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4453 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4454 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4455 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4456 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4457 		if (def != data)
4458 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4459 
4460 		/* set IDLE_POLL_COUNT(0x00900100) */
4461 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4462 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4463 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4464 		if (def != data)
4465 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4466 	} else {
4467 		/* Disable CGCG/CGLS */
4468 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4469 		/* disable cgcg, cgls should be disabled */
4470 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4471 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4472 		/* disable cgcg and cgls in FSM */
4473 		if (def != data)
4474 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4475 	}
4476 
4477 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4478 }
4479 
4480 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4481 						      bool enable)
4482 {
4483 	uint32_t def, data;
4484 
4485 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4486 
4487 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4488 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4489 		/* unset CGCG override */
4490 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4491 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4492 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4493 		else
4494 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4495 		/* update CGCG and CGLS override bits */
4496 		if (def != data)
4497 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4498 
4499 		/* enable cgcg FSM(0x0000363F) */
4500 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4501 
4502 		if (adev->asic_type == CHIP_ARCTURUS)
4503 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4504 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4505 		else
4506 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4507 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4508 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4509 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4510 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4511 		if (def != data)
4512 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4513 
4514 		/* set IDLE_POLL_COUNT(0x00900100) */
4515 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4516 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4517 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4518 		if (def != data)
4519 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4520 	} else {
4521 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4522 		/* reset CGCG/CGLS bits */
4523 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4524 		/* disable cgcg and cgls in FSM */
4525 		if (def != data)
4526 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4527 	}
4528 
4529 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4530 }
4531 
4532 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4533 					    bool enable)
4534 {
4535 	if (enable) {
4536 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4537 		 * ===  MGCG + MGLS ===
4538 		 */
4539 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4540 		/* ===  CGCG /CGLS for GFX 3D Only === */
4541 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4542 		/* ===  CGCG + CGLS === */
4543 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4544 	} else {
4545 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4546 		 * ===  CGCG + CGLS ===
4547 		 */
4548 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4549 		/* ===  CGCG /CGLS for GFX 3D Only === */
4550 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4551 		/* ===  MGCG + MGLS === */
4552 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4553 	}
4554 	return 0;
4555 }
4556 
4557 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4558 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4559 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4560 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4561 	.init = gfx_v9_0_rlc_init,
4562 	.get_csb_size = gfx_v9_0_get_csb_size,
4563 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4564 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4565 	.resume = gfx_v9_0_rlc_resume,
4566 	.stop = gfx_v9_0_rlc_stop,
4567 	.reset = gfx_v9_0_rlc_reset,
4568 	.start = gfx_v9_0_rlc_start
4569 };
4570 
4571 static int gfx_v9_0_set_powergating_state(void *handle,
4572 					  enum amd_powergating_state state)
4573 {
4574 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4575 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4576 
4577 	switch (adev->asic_type) {
4578 	case CHIP_RAVEN:
4579 	case CHIP_RENOIR:
4580 		if (!enable) {
4581 			amdgpu_gfx_off_ctrl(adev, false);
4582 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4583 		}
4584 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4585 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4586 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4587 		} else {
4588 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4589 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4590 		}
4591 
4592 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4593 			gfx_v9_0_enable_cp_power_gating(adev, true);
4594 		else
4595 			gfx_v9_0_enable_cp_power_gating(adev, false);
4596 
4597 		/* update gfx cgpg state */
4598 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4599 
4600 		/* update mgcg state */
4601 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4602 
4603 		if (enable)
4604 			amdgpu_gfx_off_ctrl(adev, true);
4605 		break;
4606 	case CHIP_VEGA12:
4607 		if (!enable) {
4608 			amdgpu_gfx_off_ctrl(adev, false);
4609 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4610 		} else {
4611 			amdgpu_gfx_off_ctrl(adev, true);
4612 		}
4613 		break;
4614 	default:
4615 		break;
4616 	}
4617 
4618 	return 0;
4619 }
4620 
4621 static int gfx_v9_0_set_clockgating_state(void *handle,
4622 					  enum amd_clockgating_state state)
4623 {
4624 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4625 
4626 	if (amdgpu_sriov_vf(adev))
4627 		return 0;
4628 
4629 	switch (adev->asic_type) {
4630 	case CHIP_VEGA10:
4631 	case CHIP_VEGA12:
4632 	case CHIP_VEGA20:
4633 	case CHIP_RAVEN:
4634 	case CHIP_ARCTURUS:
4635 	case CHIP_RENOIR:
4636 		gfx_v9_0_update_gfx_clock_gating(adev,
4637 						 state == AMD_CG_STATE_GATE ? true : false);
4638 		break;
4639 	default:
4640 		break;
4641 	}
4642 	return 0;
4643 }
4644 
4645 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4646 {
4647 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4648 	int data;
4649 
4650 	if (amdgpu_sriov_vf(adev))
4651 		*flags = 0;
4652 
4653 	/* AMD_CG_SUPPORT_GFX_MGCG */
4654 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4655 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4656 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4657 
4658 	/* AMD_CG_SUPPORT_GFX_CGCG */
4659 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4660 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4661 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4662 
4663 	/* AMD_CG_SUPPORT_GFX_CGLS */
4664 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4665 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4666 
4667 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4668 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4669 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4670 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4671 
4672 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4673 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4674 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4675 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4676 
4677 	if (adev->asic_type != CHIP_ARCTURUS) {
4678 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4679 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4680 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4681 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4682 
4683 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4684 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4685 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4686 	}
4687 }
4688 
4689 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4690 {
4691 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4692 }
4693 
4694 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4695 {
4696 	struct amdgpu_device *adev = ring->adev;
4697 	u64 wptr;
4698 
4699 	/* XXX check if swapping is necessary on BE */
4700 	if (ring->use_doorbell) {
4701 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4702 	} else {
4703 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4704 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4705 	}
4706 
4707 	return wptr;
4708 }
4709 
4710 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4711 {
4712 	struct amdgpu_device *adev = ring->adev;
4713 
4714 	if (ring->use_doorbell) {
4715 		/* XXX check if swapping is necessary on BE */
4716 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4717 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4718 	} else {
4719 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4720 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4721 	}
4722 }
4723 
4724 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4725 {
4726 	struct amdgpu_device *adev = ring->adev;
4727 	u32 ref_and_mask, reg_mem_engine;
4728 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4729 
4730 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4731 		switch (ring->me) {
4732 		case 1:
4733 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4734 			break;
4735 		case 2:
4736 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4737 			break;
4738 		default:
4739 			return;
4740 		}
4741 		reg_mem_engine = 0;
4742 	} else {
4743 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4744 		reg_mem_engine = 1; /* pfp */
4745 	}
4746 
4747 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4748 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4749 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4750 			      ref_and_mask, ref_and_mask, 0x20);
4751 }
4752 
4753 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4754 					struct amdgpu_job *job,
4755 					struct amdgpu_ib *ib,
4756 					uint32_t flags)
4757 {
4758 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4759 	u32 header, control = 0;
4760 
4761 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4762 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4763 	else
4764 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4765 
4766 	control |= ib->length_dw | (vmid << 24);
4767 
4768 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4769 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4770 
4771 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4772 			gfx_v9_0_ring_emit_de_meta(ring);
4773 	}
4774 
4775 	amdgpu_ring_write(ring, header);
4776 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4777 	amdgpu_ring_write(ring,
4778 #ifdef __BIG_ENDIAN
4779 		(2 << 0) |
4780 #endif
4781 		lower_32_bits(ib->gpu_addr));
4782 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4783 	amdgpu_ring_write(ring, control);
4784 }
4785 
4786 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4787 					  struct amdgpu_job *job,
4788 					  struct amdgpu_ib *ib,
4789 					  uint32_t flags)
4790 {
4791 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4792 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4793 
4794 	/* Currently, there is a high possibility to get wave ID mismatch
4795 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4796 	 * different wave IDs than the GDS expects. This situation happens
4797 	 * randomly when at least 5 compute pipes use GDS ordered append.
4798 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4799 	 * Those are probably bugs somewhere else in the kernel driver.
4800 	 *
4801 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4802 	 * GDS to 0 for this ring (me/pipe).
4803 	 */
4804 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4805 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4806 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4807 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4808 	}
4809 
4810 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4811 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4812 	amdgpu_ring_write(ring,
4813 #ifdef __BIG_ENDIAN
4814 				(2 << 0) |
4815 #endif
4816 				lower_32_bits(ib->gpu_addr));
4817 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4818 	amdgpu_ring_write(ring, control);
4819 }
4820 
4821 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4822 				     u64 seq, unsigned flags)
4823 {
4824 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4825 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4826 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4827 
4828 	/* RELEASE_MEM - flush caches, send int */
4829 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4830 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4831 					       EOP_TC_NC_ACTION_EN) :
4832 					      (EOP_TCL1_ACTION_EN |
4833 					       EOP_TC_ACTION_EN |
4834 					       EOP_TC_WB_ACTION_EN |
4835 					       EOP_TC_MD_ACTION_EN)) |
4836 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4837 				 EVENT_INDEX(5)));
4838 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4839 
4840 	/*
4841 	 * the address should be Qword aligned if 64bit write, Dword
4842 	 * aligned if only send 32bit data low (discard data high)
4843 	 */
4844 	if (write64bit)
4845 		BUG_ON(addr & 0x7);
4846 	else
4847 		BUG_ON(addr & 0x3);
4848 	amdgpu_ring_write(ring, lower_32_bits(addr));
4849 	amdgpu_ring_write(ring, upper_32_bits(addr));
4850 	amdgpu_ring_write(ring, lower_32_bits(seq));
4851 	amdgpu_ring_write(ring, upper_32_bits(seq));
4852 	amdgpu_ring_write(ring, 0);
4853 }
4854 
4855 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4856 {
4857 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4858 	uint32_t seq = ring->fence_drv.sync_seq;
4859 	uint64_t addr = ring->fence_drv.gpu_addr;
4860 
4861 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4862 			      lower_32_bits(addr), upper_32_bits(addr),
4863 			      seq, 0xffffffff, 4);
4864 }
4865 
4866 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4867 					unsigned vmid, uint64_t pd_addr)
4868 {
4869 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4870 
4871 	/* compute doesn't have PFP */
4872 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4873 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4874 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4875 		amdgpu_ring_write(ring, 0x0);
4876 	}
4877 }
4878 
4879 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4880 {
4881 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4882 }
4883 
4884 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4885 {
4886 	u64 wptr;
4887 
4888 	/* XXX check if swapping is necessary on BE */
4889 	if (ring->use_doorbell)
4890 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4891 	else
4892 		BUG();
4893 	return wptr;
4894 }
4895 
4896 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4897 					   bool acquire)
4898 {
4899 	struct amdgpu_device *adev = ring->adev;
4900 	int pipe_num, tmp, reg;
4901 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4902 
4903 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4904 
4905 	/* first me only has 2 entries, GFX and HP3D */
4906 	if (ring->me > 0)
4907 		pipe_num -= 2;
4908 
4909 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4910 	tmp = RREG32(reg);
4911 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4912 	WREG32(reg, tmp);
4913 }
4914 
4915 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4916 					    struct amdgpu_ring *ring,
4917 					    bool acquire)
4918 {
4919 	int i, pipe;
4920 	bool reserve;
4921 	struct amdgpu_ring *iring;
4922 
4923 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4924 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4925 	if (acquire)
4926 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4927 	else
4928 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4929 
4930 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4931 		/* Clear all reservations - everyone reacquires all resources */
4932 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4933 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4934 						       true);
4935 
4936 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4937 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4938 						       true);
4939 	} else {
4940 		/* Lower all pipes without a current reservation */
4941 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4942 			iring = &adev->gfx.gfx_ring[i];
4943 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4944 							   iring->me,
4945 							   iring->pipe,
4946 							   0);
4947 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4948 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4949 		}
4950 
4951 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4952 			iring = &adev->gfx.compute_ring[i];
4953 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4954 							   iring->me,
4955 							   iring->pipe,
4956 							   0);
4957 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4958 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4959 		}
4960 	}
4961 
4962 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4963 }
4964 
4965 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4966 				      struct amdgpu_ring *ring,
4967 				      bool acquire)
4968 {
4969 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4970 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4971 
4972 	mutex_lock(&adev->srbm_mutex);
4973 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4974 
4975 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4976 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4977 
4978 	soc15_grbm_select(adev, 0, 0, 0, 0);
4979 	mutex_unlock(&adev->srbm_mutex);
4980 }
4981 
4982 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4983 					       enum drm_sched_priority priority)
4984 {
4985 	struct amdgpu_device *adev = ring->adev;
4986 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4987 
4988 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4989 		return;
4990 
4991 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4992 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4993 }
4994 
4995 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4996 {
4997 	struct amdgpu_device *adev = ring->adev;
4998 
4999 	/* XXX check if swapping is necessary on BE */
5000 	if (ring->use_doorbell) {
5001 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5002 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5003 	} else{
5004 		BUG(); /* only DOORBELL method supported on gfx9 now */
5005 	}
5006 }
5007 
5008 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5009 					 u64 seq, unsigned int flags)
5010 {
5011 	struct amdgpu_device *adev = ring->adev;
5012 
5013 	/* we only allocate 32bit for each seq wb address */
5014 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5015 
5016 	/* write fence seq to the "addr" */
5017 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5018 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5019 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5020 	amdgpu_ring_write(ring, lower_32_bits(addr));
5021 	amdgpu_ring_write(ring, upper_32_bits(addr));
5022 	amdgpu_ring_write(ring, lower_32_bits(seq));
5023 
5024 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5025 		/* set register to trigger INT */
5026 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5027 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5028 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5029 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5030 		amdgpu_ring_write(ring, 0);
5031 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5032 	}
5033 }
5034 
5035 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5036 {
5037 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5038 	amdgpu_ring_write(ring, 0);
5039 }
5040 
5041 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5042 {
5043 	struct v9_ce_ib_state ce_payload = {0};
5044 	uint64_t csa_addr;
5045 	int cnt;
5046 
5047 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5048 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5049 
5050 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5051 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5052 				 WRITE_DATA_DST_SEL(8) |
5053 				 WR_CONFIRM) |
5054 				 WRITE_DATA_CACHE_POLICY(0));
5055 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5056 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5057 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5058 }
5059 
5060 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5061 {
5062 	struct v9_de_ib_state de_payload = {0};
5063 	uint64_t csa_addr, gds_addr;
5064 	int cnt;
5065 
5066 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5067 	gds_addr = csa_addr + 4096;
5068 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5069 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5070 
5071 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5072 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5073 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5074 				 WRITE_DATA_DST_SEL(8) |
5075 				 WR_CONFIRM) |
5076 				 WRITE_DATA_CACHE_POLICY(0));
5077 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5078 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5079 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5080 }
5081 
5082 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5083 {
5084 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5085 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5086 }
5087 
5088 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5089 {
5090 	uint32_t dw2 = 0;
5091 
5092 	if (amdgpu_sriov_vf(ring->adev))
5093 		gfx_v9_0_ring_emit_ce_meta(ring);
5094 
5095 	gfx_v9_0_ring_emit_tmz(ring, true);
5096 
5097 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5098 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5099 		/* set load_global_config & load_global_uconfig */
5100 		dw2 |= 0x8001;
5101 		/* set load_cs_sh_regs */
5102 		dw2 |= 0x01000000;
5103 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5104 		dw2 |= 0x10002;
5105 
5106 		/* set load_ce_ram if preamble presented */
5107 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5108 			dw2 |= 0x10000000;
5109 	} else {
5110 		/* still load_ce_ram if this is the first time preamble presented
5111 		 * although there is no context switch happens.
5112 		 */
5113 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5114 			dw2 |= 0x10000000;
5115 	}
5116 
5117 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5118 	amdgpu_ring_write(ring, dw2);
5119 	amdgpu_ring_write(ring, 0);
5120 }
5121 
5122 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5123 {
5124 	unsigned ret;
5125 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5126 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5127 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5128 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5129 	ret = ring->wptr & ring->buf_mask;
5130 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5131 	return ret;
5132 }
5133 
5134 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5135 {
5136 	unsigned cur;
5137 	BUG_ON(offset > ring->buf_mask);
5138 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5139 
5140 	cur = (ring->wptr & ring->buf_mask) - 1;
5141 	if (likely(cur > offset))
5142 		ring->ring[offset] = cur - offset;
5143 	else
5144 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5145 }
5146 
5147 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5148 {
5149 	struct amdgpu_device *adev = ring->adev;
5150 
5151 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5152 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5153 				(5 << 8) |	/* dst: memory */
5154 				(1 << 20));	/* write confirm */
5155 	amdgpu_ring_write(ring, reg);
5156 	amdgpu_ring_write(ring, 0);
5157 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5158 				adev->virt.reg_val_offs * 4));
5159 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5160 				adev->virt.reg_val_offs * 4));
5161 }
5162 
5163 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5164 				    uint32_t val)
5165 {
5166 	uint32_t cmd = 0;
5167 
5168 	switch (ring->funcs->type) {
5169 	case AMDGPU_RING_TYPE_GFX:
5170 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5171 		break;
5172 	case AMDGPU_RING_TYPE_KIQ:
5173 		cmd = (1 << 16); /* no inc addr */
5174 		break;
5175 	default:
5176 		cmd = WR_CONFIRM;
5177 		break;
5178 	}
5179 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5180 	amdgpu_ring_write(ring, cmd);
5181 	amdgpu_ring_write(ring, reg);
5182 	amdgpu_ring_write(ring, 0);
5183 	amdgpu_ring_write(ring, val);
5184 }
5185 
5186 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5187 					uint32_t val, uint32_t mask)
5188 {
5189 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5190 }
5191 
5192 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5193 						  uint32_t reg0, uint32_t reg1,
5194 						  uint32_t ref, uint32_t mask)
5195 {
5196 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5197 	struct amdgpu_device *adev = ring->adev;
5198 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5199 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5200 
5201 	if (fw_version_ok)
5202 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5203 				      ref, mask, 0x20);
5204 	else
5205 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5206 							   ref, mask);
5207 }
5208 
5209 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5210 {
5211 	struct amdgpu_device *adev = ring->adev;
5212 	uint32_t value = 0;
5213 
5214 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5215 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5216 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5217 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5218 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5219 }
5220 
5221 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5222 						 enum amdgpu_interrupt_state state)
5223 {
5224 	switch (state) {
5225 	case AMDGPU_IRQ_STATE_DISABLE:
5226 	case AMDGPU_IRQ_STATE_ENABLE:
5227 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5228 			       TIME_STAMP_INT_ENABLE,
5229 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5230 		break;
5231 	default:
5232 		break;
5233 	}
5234 }
5235 
5236 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5237 						     int me, int pipe,
5238 						     enum amdgpu_interrupt_state state)
5239 {
5240 	u32 mec_int_cntl, mec_int_cntl_reg;
5241 
5242 	/*
5243 	 * amdgpu controls only the first MEC. That's why this function only
5244 	 * handles the setting of interrupts for this specific MEC. All other
5245 	 * pipes' interrupts are set by amdkfd.
5246 	 */
5247 
5248 	if (me == 1) {
5249 		switch (pipe) {
5250 		case 0:
5251 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5252 			break;
5253 		case 1:
5254 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5255 			break;
5256 		case 2:
5257 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5258 			break;
5259 		case 3:
5260 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5261 			break;
5262 		default:
5263 			DRM_DEBUG("invalid pipe %d\n", pipe);
5264 			return;
5265 		}
5266 	} else {
5267 		DRM_DEBUG("invalid me %d\n", me);
5268 		return;
5269 	}
5270 
5271 	switch (state) {
5272 	case AMDGPU_IRQ_STATE_DISABLE:
5273 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5274 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5275 					     TIME_STAMP_INT_ENABLE, 0);
5276 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5277 		break;
5278 	case AMDGPU_IRQ_STATE_ENABLE:
5279 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5280 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5281 					     TIME_STAMP_INT_ENABLE, 1);
5282 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5283 		break;
5284 	default:
5285 		break;
5286 	}
5287 }
5288 
5289 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5290 					     struct amdgpu_irq_src *source,
5291 					     unsigned type,
5292 					     enum amdgpu_interrupt_state state)
5293 {
5294 	switch (state) {
5295 	case AMDGPU_IRQ_STATE_DISABLE:
5296 	case AMDGPU_IRQ_STATE_ENABLE:
5297 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5298 			       PRIV_REG_INT_ENABLE,
5299 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5300 		break;
5301 	default:
5302 		break;
5303 	}
5304 
5305 	return 0;
5306 }
5307 
5308 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5309 					      struct amdgpu_irq_src *source,
5310 					      unsigned type,
5311 					      enum amdgpu_interrupt_state state)
5312 {
5313 	switch (state) {
5314 	case AMDGPU_IRQ_STATE_DISABLE:
5315 	case AMDGPU_IRQ_STATE_ENABLE:
5316 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5317 			       PRIV_INSTR_INT_ENABLE,
5318 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5319 	default:
5320 		break;
5321 	}
5322 
5323 	return 0;
5324 }
5325 
5326 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5327 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5328 			CP_ECC_ERROR_INT_ENABLE, 1)
5329 
5330 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5331 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5332 			CP_ECC_ERROR_INT_ENABLE, 0)
5333 
5334 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5335 					      struct amdgpu_irq_src *source,
5336 					      unsigned type,
5337 					      enum amdgpu_interrupt_state state)
5338 {
5339 	switch (state) {
5340 	case AMDGPU_IRQ_STATE_DISABLE:
5341 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5342 				CP_ECC_ERROR_INT_ENABLE, 0);
5343 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5344 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5345 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5346 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5347 		break;
5348 
5349 	case AMDGPU_IRQ_STATE_ENABLE:
5350 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5351 				CP_ECC_ERROR_INT_ENABLE, 1);
5352 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5353 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5354 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5355 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5356 		break;
5357 	default:
5358 		break;
5359 	}
5360 
5361 	return 0;
5362 }
5363 
5364 
5365 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5366 					    struct amdgpu_irq_src *src,
5367 					    unsigned type,
5368 					    enum amdgpu_interrupt_state state)
5369 {
5370 	switch (type) {
5371 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5372 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5373 		break;
5374 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5375 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5376 		break;
5377 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5378 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5379 		break;
5380 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5381 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5382 		break;
5383 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5384 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5385 		break;
5386 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5387 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5388 		break;
5389 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5390 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5391 		break;
5392 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5393 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5394 		break;
5395 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5396 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5397 		break;
5398 	default:
5399 		break;
5400 	}
5401 	return 0;
5402 }
5403 
5404 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5405 			    struct amdgpu_irq_src *source,
5406 			    struct amdgpu_iv_entry *entry)
5407 {
5408 	int i;
5409 	u8 me_id, pipe_id, queue_id;
5410 	struct amdgpu_ring *ring;
5411 
5412 	DRM_DEBUG("IH: CP EOP\n");
5413 	me_id = (entry->ring_id & 0x0c) >> 2;
5414 	pipe_id = (entry->ring_id & 0x03) >> 0;
5415 	queue_id = (entry->ring_id & 0x70) >> 4;
5416 
5417 	switch (me_id) {
5418 	case 0:
5419 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5420 		break;
5421 	case 1:
5422 	case 2:
5423 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5424 			ring = &adev->gfx.compute_ring[i];
5425 			/* Per-queue interrupt is supported for MEC starting from VI.
5426 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5427 			  */
5428 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5429 				amdgpu_fence_process(ring);
5430 		}
5431 		break;
5432 	}
5433 	return 0;
5434 }
5435 
5436 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5437 			   struct amdgpu_iv_entry *entry)
5438 {
5439 	u8 me_id, pipe_id, queue_id;
5440 	struct amdgpu_ring *ring;
5441 	int i;
5442 
5443 	me_id = (entry->ring_id & 0x0c) >> 2;
5444 	pipe_id = (entry->ring_id & 0x03) >> 0;
5445 	queue_id = (entry->ring_id & 0x70) >> 4;
5446 
5447 	switch (me_id) {
5448 	case 0:
5449 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5450 		break;
5451 	case 1:
5452 	case 2:
5453 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5454 			ring = &adev->gfx.compute_ring[i];
5455 			if (ring->me == me_id && ring->pipe == pipe_id &&
5456 			    ring->queue == queue_id)
5457 				drm_sched_fault(&ring->sched);
5458 		}
5459 		break;
5460 	}
5461 }
5462 
5463 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5464 				 struct amdgpu_irq_src *source,
5465 				 struct amdgpu_iv_entry *entry)
5466 {
5467 	DRM_ERROR("Illegal register access in command stream\n");
5468 	gfx_v9_0_fault(adev, entry);
5469 	return 0;
5470 }
5471 
5472 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5473 				  struct amdgpu_irq_src *source,
5474 				  struct amdgpu_iv_entry *entry)
5475 {
5476 	DRM_ERROR("Illegal instruction in command stream\n");
5477 	gfx_v9_0_fault(adev, entry);
5478 	return 0;
5479 }
5480 
5481 
5482 static const struct ras_gfx_subblock_reg ras_subblock_regs[] = {
5483 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5484 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5485 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5486 	},
5487 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5488 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5489 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5490 	},
5491 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5492 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5493 	  0, 0
5494 	},
5495 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5496 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5497 	  0, 0
5498 	},
5499 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5500 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5501 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5502 	},
5503 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5504 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5505 	  0, 0
5506 	},
5507 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5508 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5509 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5510 	},
5511 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5512 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5513 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5514 	},
5515 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5516 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5517 	  0, 0
5518 	},
5519 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5520 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5521 	  0, 0
5522 	},
5523 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5524 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5525 	  0, 0
5526 	},
5527 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5528 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5529 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5530 	},
5531 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5532 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5533 	  0, 0
5534 	},
5535 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5536 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5537 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5538 	},
5539 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5540 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5541 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5542 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5543 	},
5544 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5545 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5546 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5547 	  0, 0
5548 	},
5549 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5550 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5551 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5552 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5553 	},
5554 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5555 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5556 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5557 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5558 	},
5559 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5560 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5561 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5562 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5563 	},
5564 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5565 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5566 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5567 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5568 	},
5569 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5570 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5571 	  0, 0
5572 	},
5573 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5574 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5575 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5576 	},
5577 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5578 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5579 	  0, 0
5580 	},
5581 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5582 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5583 	  0, 0
5584 	},
5585 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5586 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5587 	  0, 0
5588 	},
5589 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5590 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5591 	  0, 0
5592 	},
5593 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5594 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5595 	  0, 0
5596 	},
5597 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5598 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5599 	  0, 0
5600 	},
5601 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5602 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5603 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5604 	},
5605 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5606 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5607 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5608 	},
5609 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5610 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5611 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5612 	},
5613 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5614 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5615 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5616 	},
5617 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5618 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5619 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5620 	},
5621 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5622 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5623 	  0, 0
5624 	},
5625 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5626 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5627 	  0, 0
5628 	},
5629 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5630 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5631 	  0, 0
5632 	},
5633 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5634 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5635 	  0, 0
5636 	},
5637 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5638 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5639 	  0, 0
5640 	},
5641 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5642 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5643 	  0, 0
5644 	},
5645 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5646 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5647 	  0, 0
5648 	},
5649 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5650 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5651 	  0, 0
5652 	},
5653 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5654 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5655 	  0, 0
5656 	},
5657 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5658 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5659 	  0, 0
5660 	},
5661 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5662 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5663 	  0, 0
5664 	},
5665 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5666 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5667 	  0, 0
5668 	},
5669 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5670 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5671 	  0, 0
5672 	},
5673 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5674 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5675 	  0, 0
5676 	},
5677 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5678 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5679 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5680 	},
5681 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5682 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5683 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5684 	},
5685 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5686 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5687 	  0, 0
5688 	},
5689 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5690 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5691 	  0, 0
5692 	},
5693 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5694 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5695 	  0, 0
5696 	},
5697 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5698 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5699 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5700 	},
5701 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5702 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5703 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5704 	},
5705 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5706 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5707 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5708 	},
5709 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5710 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5711 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5712 	},
5713 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5714 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5715 	  0, 0
5716 	},
5717 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5718 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5719 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5720 	},
5721 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5722 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5723 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5724 	},
5725 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5726 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5727 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5728 	},
5729 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5730 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5731 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5732 	},
5733 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5734 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5735 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5736 	},
5737 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5738 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5739 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5740 	},
5741 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5742 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5743 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5744 	},
5745 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5746 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5747 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5748 	},
5749 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5750 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5751 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5752 	},
5753 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5754 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5755 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5756 	},
5757 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5758 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5759 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5760 	},
5761 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5762 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5763 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5764 	},
5765 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5766 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5767 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5768 	},
5769 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5770 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5771 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5772 	},
5773 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5774 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5775 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5776 	},
5777 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5778 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5779 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5780 	},
5781 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5782 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5783 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5784 	},
5785 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5786 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5787 	  0, 0
5788 	},
5789 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5790 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5791 	  0, 0
5792 	},
5793 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5794 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5795 	  0, 0
5796 	},
5797 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5798 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5799 	  0, 0
5800 	},
5801 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5802 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5803 	  0, 0
5804 	},
5805 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5806 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5807 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5808 	},
5809 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5810 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5811 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5812 	},
5813 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5814 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5815 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5816 	},
5817 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5818 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5819 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5820 	},
5821 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5822 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5823 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5824 	},
5825 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5826 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5827 	  0, 0
5828 	},
5829 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5830 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5831 	  0, 0
5832 	},
5833 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5834 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5835 	  0, 0
5836 	},
5837 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5838 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5839 	  0, 0
5840 	},
5841 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5842 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5843 	  0, 0
5844 	},
5845 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5846 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5847 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5848 	},
5849 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5850 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5851 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5852 	},
5853 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5854 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5855 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5856 	},
5857 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5858 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5859 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5860 	},
5861 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5862 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5863 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5864 	},
5865 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5866 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5867 	  0, 0
5868 	},
5869 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5870 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5871 	  0, 0
5872 	},
5873 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5874 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5875 	  0, 0
5876 	},
5877 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5878 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5879 	  0, 0
5880 	},
5881 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5882 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5883 	  0, 0
5884 	},
5885 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5886 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5887 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5888 	},
5889 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5890 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5891 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5892 	},
5893 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5894 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5895 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5896 	},
5897 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5898 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5899 	  0, 0
5900 	},
5901 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5902 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5903 	  0, 0
5904 	},
5905 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5906 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5907 	  0, 0
5908 	},
5909 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5910 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5911 	  0, 0
5912 	},
5913 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5914 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5915 	  0, 0
5916 	},
5917 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5918 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5919 	  0, 0
5920 	}
5921 };
5922 
5923 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5924 				     void *inject_if)
5925 {
5926 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5927 	int ret;
5928 	struct ta_ras_trigger_error_input block_info = { 0 };
5929 
5930 	if (adev->asic_type != CHIP_VEGA20)
5931 		return -EINVAL;
5932 
5933 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5934 		return -EINVAL;
5935 
5936 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5937 		return -EPERM;
5938 
5939 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5940 	      info->head.type)) {
5941 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5942 			ras_gfx_subblocks[info->head.sub_block_index].name,
5943 			info->head.type);
5944 		return -EPERM;
5945 	}
5946 
5947 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5948 	      info->head.type)) {
5949 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5950 			ras_gfx_subblocks[info->head.sub_block_index].name,
5951 			info->head.type);
5952 		return -EPERM;
5953 	}
5954 
5955 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5956 	block_info.sub_block_index =
5957 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5958 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5959 	block_info.address = info->address;
5960 	block_info.value = info->value;
5961 
5962 	mutex_lock(&adev->grbm_idx_mutex);
5963 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
5964 	mutex_unlock(&adev->grbm_idx_mutex);
5965 
5966 	return ret;
5967 }
5968 
5969 static const char *vml2_mems[] = {
5970 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5971 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5972 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
5973 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
5974 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5975 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5976 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
5977 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
5978 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5979 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5980 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
5981 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
5982 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5983 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5984 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
5985 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
5986 };
5987 
5988 static const char *vml2_walker_mems[] = {
5989 	"UTC_VML2_CACHE_PDE0_MEM0",
5990 	"UTC_VML2_CACHE_PDE0_MEM1",
5991 	"UTC_VML2_CACHE_PDE1_MEM0",
5992 	"UTC_VML2_CACHE_PDE1_MEM1",
5993 	"UTC_VML2_CACHE_PDE2_MEM0",
5994 	"UTC_VML2_CACHE_PDE2_MEM1",
5995 	"UTC_VML2_RDIF_LOG_FIFO",
5996 };
5997 
5998 static const char *atc_l2_cache_2m_mems[] = {
5999 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6000 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6001 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6002 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6003 };
6004 
6005 static const char *atc_l2_cache_4k_mems[] = {
6006 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6007 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6008 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6009 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6010 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6011 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6012 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6013 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6014 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6015 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6016 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6017 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6018 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6019 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6020 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6021 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6022 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6023 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6024 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6025 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6026 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6027 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6028 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6029 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6030 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6031 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6032 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6033 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6034 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6035 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6036 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6037 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6038 };
6039 
6040 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6041 					 struct ras_err_data *err_data)
6042 {
6043 	uint32_t i, data;
6044 	uint32_t sec_count, ded_count;
6045 
6046 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6047 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6048 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6049 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6050 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6051 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6052 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6053 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6054 
6055 	for (i = 0; i < 16; i++) {
6056 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6057 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6058 
6059 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6060 		if (sec_count) {
6061 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6062 				 vml2_mems[i], sec_count);
6063 			err_data->ce_count += sec_count;
6064 		}
6065 
6066 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6067 		if (ded_count) {
6068 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6069 				 vml2_mems[i], ded_count);
6070 			err_data->ue_count += ded_count;
6071 		}
6072 	}
6073 
6074 	for (i = 0; i < 7; i++) {
6075 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6076 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6077 
6078 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6079 						SEC_COUNT);
6080 		if (sec_count) {
6081 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6082 				 vml2_walker_mems[i], sec_count);
6083 			err_data->ce_count += sec_count;
6084 		}
6085 
6086 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6087 						DED_COUNT);
6088 		if (ded_count) {
6089 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6090 				 vml2_walker_mems[i], ded_count);
6091 			err_data->ue_count += ded_count;
6092 		}
6093 	}
6094 
6095 	for (i = 0; i < 4; i++) {
6096 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6097 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6098 
6099 		sec_count = (data & 0x00006000L) >> 0xd;
6100 		if (sec_count) {
6101 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6102 				 atc_l2_cache_2m_mems[i], sec_count);
6103 			err_data->ce_count += sec_count;
6104 		}
6105 	}
6106 
6107 	for (i = 0; i < 32; i++) {
6108 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6109 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6110 
6111 		sec_count = (data & 0x00006000L) >> 0xd;
6112 		if (sec_count) {
6113 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6114 				 atc_l2_cache_4k_mems[i], sec_count);
6115 			err_data->ce_count += sec_count;
6116 		}
6117 
6118 		ded_count = (data & 0x00018000L) >> 0xf;
6119 		if (ded_count) {
6120 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6121 				 atc_l2_cache_4k_mems[i], ded_count);
6122 			err_data->ue_count += ded_count;
6123 		}
6124 	}
6125 
6126 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6127 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6128 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6129 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6130 
6131 	return 0;
6132 }
6133 
6134 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6135 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6136 	uint32_t *sec_count, uint32_t *ded_count)
6137 {
6138 	uint32_t i;
6139 	uint32_t sec_cnt, ded_cnt;
6140 
6141 	for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) {
6142 		if(ras_subblock_regs[i].reg_offset != reg->reg_offset ||
6143 			ras_subblock_regs[i].seg != reg->seg ||
6144 			ras_subblock_regs[i].inst != reg->inst)
6145 			continue;
6146 
6147 		sec_cnt = (value &
6148 				ras_subblock_regs[i].sec_count_mask) >>
6149 				ras_subblock_regs[i].sec_count_shift;
6150 		if (sec_cnt) {
6151 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6152 				ras_subblock_regs[i].name,
6153 				se_id, inst_id,
6154 				sec_cnt);
6155 			*sec_count += sec_cnt;
6156 		}
6157 
6158 		ded_cnt = (value &
6159 				ras_subblock_regs[i].ded_count_mask) >>
6160 				ras_subblock_regs[i].ded_count_shift;
6161 		if (ded_cnt) {
6162 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6163 				ras_subblock_regs[i].name,
6164 				se_id, inst_id,
6165 				ded_cnt);
6166 			*ded_count += ded_cnt;
6167 		}
6168 	}
6169 
6170 	return 0;
6171 }
6172 
6173 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6174 					  void *ras_error_status)
6175 {
6176 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6177 	uint32_t sec_count = 0, ded_count = 0;
6178 	uint32_t i, j, k;
6179 	uint32_t reg_value;
6180 
6181 	if (adev->asic_type != CHIP_VEGA20)
6182 		return -EINVAL;
6183 
6184 	err_data->ue_count = 0;
6185 	err_data->ce_count = 0;
6186 
6187 	mutex_lock(&adev->grbm_idx_mutex);
6188 
6189 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6190 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6191 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6192 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6193 				reg_value =
6194 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6195 				if (reg_value)
6196 					__get_ras_error_count(&sec_ded_counter_registers[i],
6197 							j, k, reg_value,
6198 							&sec_count, &ded_count);
6199 			}
6200 		}
6201 	}
6202 
6203 	err_data->ce_count += sec_count;
6204 	err_data->ue_count += ded_count;
6205 
6206 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6207 	mutex_unlock(&adev->grbm_idx_mutex);
6208 
6209 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6210 
6211 	return 0;
6212 }
6213 
6214 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6215 	.name = "gfx_v9_0",
6216 	.early_init = gfx_v9_0_early_init,
6217 	.late_init = gfx_v9_0_late_init,
6218 	.sw_init = gfx_v9_0_sw_init,
6219 	.sw_fini = gfx_v9_0_sw_fini,
6220 	.hw_init = gfx_v9_0_hw_init,
6221 	.hw_fini = gfx_v9_0_hw_fini,
6222 	.suspend = gfx_v9_0_suspend,
6223 	.resume = gfx_v9_0_resume,
6224 	.is_idle = gfx_v9_0_is_idle,
6225 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6226 	.soft_reset = gfx_v9_0_soft_reset,
6227 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6228 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6229 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6230 };
6231 
6232 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6233 	.type = AMDGPU_RING_TYPE_GFX,
6234 	.align_mask = 0xff,
6235 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6236 	.support_64bit_ptrs = true,
6237 	.vmhub = AMDGPU_GFXHUB_0,
6238 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6239 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6240 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6241 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6242 		5 +  /* COND_EXEC */
6243 		7 +  /* PIPELINE_SYNC */
6244 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6245 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6246 		2 + /* VM_FLUSH */
6247 		8 +  /* FENCE for VM_FLUSH */
6248 		20 + /* GDS switch */
6249 		4 + /* double SWITCH_BUFFER,
6250 		       the first COND_EXEC jump to the place just
6251 			   prior to this double SWITCH_BUFFER  */
6252 		5 + /* COND_EXEC */
6253 		7 +	 /*	HDP_flush */
6254 		4 +	 /*	VGT_flush */
6255 		14 + /*	CE_META */
6256 		31 + /*	DE_META */
6257 		3 + /* CNTX_CTRL */
6258 		5 + /* HDP_INVL */
6259 		8 + 8 + /* FENCE x2 */
6260 		2, /* SWITCH_BUFFER */
6261 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6262 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6263 	.emit_fence = gfx_v9_0_ring_emit_fence,
6264 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6265 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6266 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6267 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6268 	.test_ring = gfx_v9_0_ring_test_ring,
6269 	.test_ib = gfx_v9_0_ring_test_ib,
6270 	.insert_nop = amdgpu_ring_insert_nop,
6271 	.pad_ib = amdgpu_ring_generic_pad_ib,
6272 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6273 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6274 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6275 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6276 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6277 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6278 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6279 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6280 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6281 };
6282 
6283 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6284 	.type = AMDGPU_RING_TYPE_COMPUTE,
6285 	.align_mask = 0xff,
6286 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6287 	.support_64bit_ptrs = true,
6288 	.vmhub = AMDGPU_GFXHUB_0,
6289 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6290 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6291 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6292 	.emit_frame_size =
6293 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6294 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6295 		5 + /* hdp invalidate */
6296 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6297 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6298 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6299 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6300 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6301 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6302 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6303 	.emit_fence = gfx_v9_0_ring_emit_fence,
6304 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6305 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6306 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6307 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6308 	.test_ring = gfx_v9_0_ring_test_ring,
6309 	.test_ib = gfx_v9_0_ring_test_ib,
6310 	.insert_nop = amdgpu_ring_insert_nop,
6311 	.pad_ib = amdgpu_ring_generic_pad_ib,
6312 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6313 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6314 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6315 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6316 };
6317 
6318 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6319 	.type = AMDGPU_RING_TYPE_KIQ,
6320 	.align_mask = 0xff,
6321 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6322 	.support_64bit_ptrs = true,
6323 	.vmhub = AMDGPU_GFXHUB_0,
6324 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6325 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6326 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6327 	.emit_frame_size =
6328 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6329 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6330 		5 + /* hdp invalidate */
6331 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6332 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6333 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6334 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6335 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6336 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6337 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6338 	.test_ring = gfx_v9_0_ring_test_ring,
6339 	.insert_nop = amdgpu_ring_insert_nop,
6340 	.pad_ib = amdgpu_ring_generic_pad_ib,
6341 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6342 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6343 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6344 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6345 };
6346 
6347 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6348 {
6349 	int i;
6350 
6351 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6352 
6353 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6354 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6355 
6356 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6357 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6358 }
6359 
6360 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6361 	.set = gfx_v9_0_set_eop_interrupt_state,
6362 	.process = gfx_v9_0_eop_irq,
6363 };
6364 
6365 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6366 	.set = gfx_v9_0_set_priv_reg_fault_state,
6367 	.process = gfx_v9_0_priv_reg_irq,
6368 };
6369 
6370 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6371 	.set = gfx_v9_0_set_priv_inst_fault_state,
6372 	.process = gfx_v9_0_priv_inst_irq,
6373 };
6374 
6375 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6376 	.set = gfx_v9_0_set_cp_ecc_error_state,
6377 	.process = amdgpu_gfx_cp_ecc_error_irq,
6378 };
6379 
6380 
6381 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6382 {
6383 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6384 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6385 
6386 	adev->gfx.priv_reg_irq.num_types = 1;
6387 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6388 
6389 	adev->gfx.priv_inst_irq.num_types = 1;
6390 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6391 
6392 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6393 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6394 }
6395 
6396 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6397 {
6398 	switch (adev->asic_type) {
6399 	case CHIP_VEGA10:
6400 	case CHIP_VEGA12:
6401 	case CHIP_VEGA20:
6402 	case CHIP_RAVEN:
6403 	case CHIP_ARCTURUS:
6404 	case CHIP_RENOIR:
6405 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6406 		break;
6407 	default:
6408 		break;
6409 	}
6410 }
6411 
6412 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6413 {
6414 	/* init asci gds info */
6415 	switch (adev->asic_type) {
6416 	case CHIP_VEGA10:
6417 	case CHIP_VEGA12:
6418 	case CHIP_VEGA20:
6419 		adev->gds.gds_size = 0x10000;
6420 		break;
6421 	case CHIP_RAVEN:
6422 	case CHIP_ARCTURUS:
6423 		adev->gds.gds_size = 0x1000;
6424 		break;
6425 	default:
6426 		adev->gds.gds_size = 0x10000;
6427 		break;
6428 	}
6429 
6430 	switch (adev->asic_type) {
6431 	case CHIP_VEGA10:
6432 	case CHIP_VEGA20:
6433 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6434 		break;
6435 	case CHIP_VEGA12:
6436 		adev->gds.gds_compute_max_wave_id = 0x27f;
6437 		break;
6438 	case CHIP_RAVEN:
6439 		if (adev->rev_id >= 0x8)
6440 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6441 		else
6442 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6443 		break;
6444 	case CHIP_ARCTURUS:
6445 		adev->gds.gds_compute_max_wave_id = 0xfff;
6446 		break;
6447 	default:
6448 		/* this really depends on the chip */
6449 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6450 		break;
6451 	}
6452 
6453 	adev->gds.gws_size = 64;
6454 	adev->gds.oa_size = 16;
6455 }
6456 
6457 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6458 						 u32 bitmap)
6459 {
6460 	u32 data;
6461 
6462 	if (!bitmap)
6463 		return;
6464 
6465 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6466 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6467 
6468 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6469 }
6470 
6471 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6472 {
6473 	u32 data, mask;
6474 
6475 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6476 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6477 
6478 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6479 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6480 
6481 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6482 
6483 	return (~data) & mask;
6484 }
6485 
6486 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6487 				 struct amdgpu_cu_info *cu_info)
6488 {
6489 	int i, j, k, counter, active_cu_number = 0;
6490 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6491 	unsigned disable_masks[4 * 4];
6492 
6493 	if (!adev || !cu_info)
6494 		return -EINVAL;
6495 
6496 	/*
6497 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6498 	 */
6499 	if (adev->gfx.config.max_shader_engines *
6500 		adev->gfx.config.max_sh_per_se > 16)
6501 		return -EINVAL;
6502 
6503 	amdgpu_gfx_parse_disable_cu(disable_masks,
6504 				    adev->gfx.config.max_shader_engines,
6505 				    adev->gfx.config.max_sh_per_se);
6506 
6507 	mutex_lock(&adev->grbm_idx_mutex);
6508 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6509 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6510 			mask = 1;
6511 			ao_bitmap = 0;
6512 			counter = 0;
6513 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6514 			gfx_v9_0_set_user_cu_inactive_bitmap(
6515 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6516 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6517 
6518 			/*
6519 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6520 			 * 4x4 size array, and it's usually suitable for Vega
6521 			 * ASICs which has 4*2 SE/SH layout.
6522 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6523 			 * To mostly reduce the impact, we make it compatible
6524 			 * with current bitmap array as below:
6525 			 *    SE4,SH0 --> bitmap[0][1]
6526 			 *    SE5,SH0 --> bitmap[1][1]
6527 			 *    SE6,SH0 --> bitmap[2][1]
6528 			 *    SE7,SH0 --> bitmap[3][1]
6529 			 */
6530 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6531 
6532 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6533 				if (bitmap & mask) {
6534 					if (counter < adev->gfx.config.max_cu_per_sh)
6535 						ao_bitmap |= mask;
6536 					counter ++;
6537 				}
6538 				mask <<= 1;
6539 			}
6540 			active_cu_number += counter;
6541 			if (i < 2 && j < 2)
6542 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6543 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6544 		}
6545 	}
6546 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6547 	mutex_unlock(&adev->grbm_idx_mutex);
6548 
6549 	cu_info->number = active_cu_number;
6550 	cu_info->ao_cu_mask = ao_cu_mask;
6551 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6552 
6553 	return 0;
6554 }
6555 
6556 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6557 {
6558 	.type = AMD_IP_BLOCK_TYPE_GFX,
6559 	.major = 9,
6560 	.minor = 0,
6561 	.rev = 0,
6562 	.funcs = &gfx_v9_0_ip_funcs,
6563 };
6564