xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 2b77dcc5)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #include "sdma0/sdma0_4_0_offset.h"
52 #include "sdma1/sdma1_4_0_offset.h"
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57 
58 #define mmPWR_MISC_CNTL_STATUS					0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122 
123 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
133 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
135 
136 enum ta_ras_gfx_subblock {
137 	/*CPC*/
138 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140 	TA_RAS_BLOCK__GFX_CPC_UCODE,
141 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148 	/* CPF*/
149 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152 	TA_RAS_BLOCK__GFX_CPF_TAG,
153 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154 	/* CPG*/
155 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158 	TA_RAS_BLOCK__GFX_CPG_TAG,
159 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160 	/* GDS*/
161 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168 	/* SPI*/
169 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170 	/* SQ*/
171 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
174 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
175 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177 	/* SQC (3 ranges)*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	/* SQC range 0*/
180 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191 	/* SQC range 1*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205 	/* SQC range 2*/
206 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220 	/* TA*/
221 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
222 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228 	/* TCA*/
229 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233 	/* TCC (5 sub-ranges)*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	/* TCC range 0*/
236 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246 	/* TCC range 1*/
247 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252 	/* TCC range 2*/
253 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264 	/* TCC range 3*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270 	/* TCC range 4*/
271 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278 	/* TCI*/
279 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280 	/* TCP*/
281 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290 	/* TD*/
291 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
292 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296 	/* EA (3 sub-ranges)*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	/* EA range 0*/
299 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309 	/* EA range 1*/
310 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319 	/* EA range 2*/
320 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327 	/* UTC VM L2 bank*/
328 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329 	/* UTC VM walker*/
330 	TA_RAS_BLOCK__UTC_VML2_WALKER,
331 	/* UTC ATC L2 2MB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333 	/* UTC ATC L2 4KB cache*/
334 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335 	TA_RAS_BLOCK__GFX_MAX
336 };
337 
338 struct ras_gfx_subblock {
339 	unsigned char *name;
340 	int ta_subblock;
341 	int hw_supported_error_type;
342 	int sw_supported_error_type;
343 };
344 
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347 		#subblock,                                                     \
348 		TA_RAS_BLOCK__##subblock,                                      \
349 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351 	}
352 
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373 			     0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382 			     0, 0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384 			     0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386 			     0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388 			     0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390 			     0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392 			     0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394 			     1),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396 			     0, 0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408 			     0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414 			     0, 0, 0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426 			     0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442 			     1),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446 			     0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459 			     0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462 			     0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464 			     0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466 			     0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502 
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526 
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548 
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563 
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591 
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602 
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625 
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668 
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685 
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699 
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711 
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723 
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728 
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740 					  void *ras_error_status);
741 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
742 				     void *inject_if);
743 
744 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
745 {
746 	switch (adev->asic_type) {
747 	case CHIP_VEGA10:
748 		soc15_program_register_sequence(adev,
749 						golden_settings_gc_9_0,
750 						ARRAY_SIZE(golden_settings_gc_9_0));
751 		soc15_program_register_sequence(adev,
752 						golden_settings_gc_9_0_vg10,
753 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
754 		break;
755 	case CHIP_VEGA12:
756 		soc15_program_register_sequence(adev,
757 						golden_settings_gc_9_2_1,
758 						ARRAY_SIZE(golden_settings_gc_9_2_1));
759 		soc15_program_register_sequence(adev,
760 						golden_settings_gc_9_2_1_vg12,
761 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
762 		break;
763 	case CHIP_VEGA20:
764 		soc15_program_register_sequence(adev,
765 						golden_settings_gc_9_0,
766 						ARRAY_SIZE(golden_settings_gc_9_0));
767 		soc15_program_register_sequence(adev,
768 						golden_settings_gc_9_0_vg20,
769 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
770 		break;
771 	case CHIP_ARCTURUS:
772 		soc15_program_register_sequence(adev,
773 						golden_settings_gc_9_4_1_arct,
774 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
775 		break;
776 	case CHIP_RAVEN:
777 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
778 						ARRAY_SIZE(golden_settings_gc_9_1));
779 		if (adev->rev_id >= 8)
780 			soc15_program_register_sequence(adev,
781 							golden_settings_gc_9_1_rv2,
782 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
783 		else
784 			soc15_program_register_sequence(adev,
785 							golden_settings_gc_9_1_rv1,
786 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
787 		break;
788 	 case CHIP_RENOIR:
789 		soc15_program_register_sequence(adev,
790 						golden_settings_gc_9_1_rn,
791 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
792 		return; /* for renoir, don't need common goldensetting */
793 	default:
794 		break;
795 	}
796 
797 	if (adev->asic_type != CHIP_ARCTURUS)
798 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
799 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
800 }
801 
802 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
803 {
804 	adev->gfx.scratch.num_reg = 8;
805 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
806 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
807 }
808 
809 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
810 				       bool wc, uint32_t reg, uint32_t val)
811 {
812 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
813 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
814 				WRITE_DATA_DST_SEL(0) |
815 				(wc ? WR_CONFIRM : 0));
816 	amdgpu_ring_write(ring, reg);
817 	amdgpu_ring_write(ring, 0);
818 	amdgpu_ring_write(ring, val);
819 }
820 
821 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
822 				  int mem_space, int opt, uint32_t addr0,
823 				  uint32_t addr1, uint32_t ref, uint32_t mask,
824 				  uint32_t inv)
825 {
826 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
827 	amdgpu_ring_write(ring,
828 				 /* memory (1) or register (0) */
829 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
830 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
831 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
832 				 WAIT_REG_MEM_ENGINE(eng_sel)));
833 
834 	if (mem_space)
835 		BUG_ON(addr0 & 0x3); /* Dword align */
836 	amdgpu_ring_write(ring, addr0);
837 	amdgpu_ring_write(ring, addr1);
838 	amdgpu_ring_write(ring, ref);
839 	amdgpu_ring_write(ring, mask);
840 	amdgpu_ring_write(ring, inv); /* poll interval */
841 }
842 
843 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
844 {
845 	struct amdgpu_device *adev = ring->adev;
846 	uint32_t scratch;
847 	uint32_t tmp = 0;
848 	unsigned i;
849 	int r;
850 
851 	r = amdgpu_gfx_scratch_get(adev, &scratch);
852 	if (r)
853 		return r;
854 
855 	WREG32(scratch, 0xCAFEDEAD);
856 	r = amdgpu_ring_alloc(ring, 3);
857 	if (r)
858 		goto error_free_scratch;
859 
860 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
861 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
862 	amdgpu_ring_write(ring, 0xDEADBEEF);
863 	amdgpu_ring_commit(ring);
864 
865 	for (i = 0; i < adev->usec_timeout; i++) {
866 		tmp = RREG32(scratch);
867 		if (tmp == 0xDEADBEEF)
868 			break;
869 		udelay(1);
870 	}
871 
872 	if (i >= adev->usec_timeout)
873 		r = -ETIMEDOUT;
874 
875 error_free_scratch:
876 	amdgpu_gfx_scratch_free(adev, scratch);
877 	return r;
878 }
879 
880 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
881 {
882 	struct amdgpu_device *adev = ring->adev;
883 	struct amdgpu_ib ib;
884 	struct dma_fence *f = NULL;
885 
886 	unsigned index;
887 	uint64_t gpu_addr;
888 	uint32_t tmp;
889 	long r;
890 
891 	r = amdgpu_device_wb_get(adev, &index);
892 	if (r)
893 		return r;
894 
895 	gpu_addr = adev->wb.gpu_addr + (index * 4);
896 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
897 	memset(&ib, 0, sizeof(ib));
898 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
899 	if (r)
900 		goto err1;
901 
902 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904 	ib.ptr[2] = lower_32_bits(gpu_addr);
905 	ib.ptr[3] = upper_32_bits(gpu_addr);
906 	ib.ptr[4] = 0xDEADBEEF;
907 	ib.length_dw = 5;
908 
909 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910 	if (r)
911 		goto err2;
912 
913 	r = dma_fence_wait_timeout(f, false, timeout);
914 	if (r == 0) {
915 		r = -ETIMEDOUT;
916 		goto err2;
917 	} else if (r < 0) {
918 		goto err2;
919 	}
920 
921 	tmp = adev->wb.wb[index];
922 	if (tmp == 0xDEADBEEF)
923 		r = 0;
924 	else
925 		r = -EINVAL;
926 
927 err2:
928 	amdgpu_ib_free(adev, &ib, NULL);
929 	dma_fence_put(f);
930 err1:
931 	amdgpu_device_wb_free(adev, index);
932 	return r;
933 }
934 
935 
936 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
937 {
938 	release_firmware(adev->gfx.pfp_fw);
939 	adev->gfx.pfp_fw = NULL;
940 	release_firmware(adev->gfx.me_fw);
941 	adev->gfx.me_fw = NULL;
942 	release_firmware(adev->gfx.ce_fw);
943 	adev->gfx.ce_fw = NULL;
944 	release_firmware(adev->gfx.rlc_fw);
945 	adev->gfx.rlc_fw = NULL;
946 	release_firmware(adev->gfx.mec_fw);
947 	adev->gfx.mec_fw = NULL;
948 	release_firmware(adev->gfx.mec2_fw);
949 	adev->gfx.mec2_fw = NULL;
950 
951 	kfree(adev->gfx.rlc.register_list_format);
952 }
953 
954 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
955 {
956 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
957 
958 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
959 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
960 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
961 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
962 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
963 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
964 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
965 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
966 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
967 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
968 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
969 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
970 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
971 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
972 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
973 }
974 
975 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
976 {
977 	adev->gfx.me_fw_write_wait = false;
978 	adev->gfx.mec_fw_write_wait = false;
979 
980 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
981 	    (adev->gfx.mec_feature_version < 46) ||
982 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
983 	    (adev->gfx.pfp_feature_version < 46))
984 		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
985 			      GRBM requires 1-cycle delay in cp firmware\n");
986 
987 	switch (adev->asic_type) {
988 	case CHIP_VEGA10:
989 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
990 		    (adev->gfx.me_feature_version >= 42) &&
991 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
992 		    (adev->gfx.pfp_feature_version >= 42))
993 			adev->gfx.me_fw_write_wait = true;
994 
995 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
996 		    (adev->gfx.mec_feature_version >= 42))
997 			adev->gfx.mec_fw_write_wait = true;
998 		break;
999 	case CHIP_VEGA12:
1000 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1001 		    (adev->gfx.me_feature_version >= 44) &&
1002 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1003 		    (adev->gfx.pfp_feature_version >= 44))
1004 			adev->gfx.me_fw_write_wait = true;
1005 
1006 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1007 		    (adev->gfx.mec_feature_version >= 44))
1008 			adev->gfx.mec_fw_write_wait = true;
1009 		break;
1010 	case CHIP_VEGA20:
1011 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1012 		    (adev->gfx.me_feature_version >= 44) &&
1013 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1014 		    (adev->gfx.pfp_feature_version >= 44))
1015 			adev->gfx.me_fw_write_wait = true;
1016 
1017 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1018 		    (adev->gfx.mec_feature_version >= 44))
1019 			adev->gfx.mec_fw_write_wait = true;
1020 		break;
1021 	case CHIP_RAVEN:
1022 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1023 		    (adev->gfx.me_feature_version >= 42) &&
1024 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1025 		    (adev->gfx.pfp_feature_version >= 42))
1026 			adev->gfx.me_fw_write_wait = true;
1027 
1028 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1029 		    (adev->gfx.mec_feature_version >= 42))
1030 			adev->gfx.mec_fw_write_wait = true;
1031 		break;
1032 	default:
1033 		break;
1034 	}
1035 }
1036 
1037 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1038 {
1039 	switch (adev->asic_type) {
1040 	case CHIP_VEGA10:
1041 	case CHIP_VEGA12:
1042 	case CHIP_VEGA20:
1043 		break;
1044 	case CHIP_RAVEN:
1045 		/* Disable GFXOFF on original raven.  There are combinations
1046 		 * of sbios and platforms that are not stable.
1047 		 */
1048 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
1049 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1050 		else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1051 			 &&((adev->gfx.rlc_fw_version != 106 &&
1052 			     adev->gfx.rlc_fw_version < 531) ||
1053 			    (adev->gfx.rlc_fw_version == 53815) ||
1054 			    (adev->gfx.rlc_feature_version < 1) ||
1055 			    !adev->gfx.rlc.is_rlc_v2_1))
1056 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1057 
1058 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1059 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1060 				AMD_PG_SUPPORT_CP |
1061 				AMD_PG_SUPPORT_RLC_SMU_HS;
1062 		break;
1063 	case CHIP_RENOIR:
1064 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1065 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1066 				AMD_PG_SUPPORT_CP |
1067 				AMD_PG_SUPPORT_RLC_SMU_HS;
1068 		break;
1069 	default:
1070 		break;
1071 	}
1072 }
1073 
1074 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1075 					  const char *chip_name)
1076 {
1077 	char fw_name[30];
1078 	int err;
1079 	struct amdgpu_firmware_info *info = NULL;
1080 	const struct common_firmware_header *header = NULL;
1081 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1082 
1083 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1084 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1085 	if (err)
1086 		goto out;
1087 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1088 	if (err)
1089 		goto out;
1090 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1091 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1092 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1093 
1094 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1095 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1096 	if (err)
1097 		goto out;
1098 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1099 	if (err)
1100 		goto out;
1101 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1102 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1103 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1104 
1105 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1106 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1107 	if (err)
1108 		goto out;
1109 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1110 	if (err)
1111 		goto out;
1112 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1113 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1114 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1115 
1116 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1117 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1118 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1119 		info->fw = adev->gfx.pfp_fw;
1120 		header = (const struct common_firmware_header *)info->fw->data;
1121 		adev->firmware.fw_size +=
1122 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1123 
1124 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1125 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1126 		info->fw = adev->gfx.me_fw;
1127 		header = (const struct common_firmware_header *)info->fw->data;
1128 		adev->firmware.fw_size +=
1129 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1130 
1131 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1132 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1133 		info->fw = adev->gfx.ce_fw;
1134 		header = (const struct common_firmware_header *)info->fw->data;
1135 		adev->firmware.fw_size +=
1136 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1137 	}
1138 
1139 out:
1140 	if (err) {
1141 		dev_err(adev->dev,
1142 			"gfx9: Failed to load firmware \"%s\"\n",
1143 			fw_name);
1144 		release_firmware(adev->gfx.pfp_fw);
1145 		adev->gfx.pfp_fw = NULL;
1146 		release_firmware(adev->gfx.me_fw);
1147 		adev->gfx.me_fw = NULL;
1148 		release_firmware(adev->gfx.ce_fw);
1149 		adev->gfx.ce_fw = NULL;
1150 	}
1151 	return err;
1152 }
1153 
1154 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1155 					  const char *chip_name)
1156 {
1157 	char fw_name[30];
1158 	int err;
1159 	struct amdgpu_firmware_info *info = NULL;
1160 	const struct common_firmware_header *header = NULL;
1161 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1162 	unsigned int *tmp = NULL;
1163 	unsigned int i = 0;
1164 	uint16_t version_major;
1165 	uint16_t version_minor;
1166 	uint32_t smu_version;
1167 
1168 	/*
1169 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1170 	 * instead of picasso_rlc.bin.
1171 	 * Judgment method:
1172 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1173 	 *          or revision >= 0xD8 && revision <= 0xDF
1174 	 * otherwise is PCO FP5
1175 	 */
1176 	if (!strcmp(chip_name, "picasso") &&
1177 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1178 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1179 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1180 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1181 		(smu_version >= 0x41e2b))
1182 		/**
1183 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1184 		*/
1185 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1186 	else
1187 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1188 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1189 	if (err)
1190 		goto out;
1191 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1192 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1193 
1194 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1195 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1196 	if (version_major == 2 && version_minor == 1)
1197 		adev->gfx.rlc.is_rlc_v2_1 = true;
1198 
1199 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1200 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1201 	adev->gfx.rlc.save_and_restore_offset =
1202 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1203 	adev->gfx.rlc.clear_state_descriptor_offset =
1204 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1205 	adev->gfx.rlc.avail_scratch_ram_locations =
1206 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1207 	adev->gfx.rlc.reg_restore_list_size =
1208 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1209 	adev->gfx.rlc.reg_list_format_start =
1210 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1211 	adev->gfx.rlc.reg_list_format_separate_start =
1212 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1213 	adev->gfx.rlc.starting_offsets_start =
1214 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1215 	adev->gfx.rlc.reg_list_format_size_bytes =
1216 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1217 	adev->gfx.rlc.reg_list_size_bytes =
1218 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1219 	adev->gfx.rlc.register_list_format =
1220 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1221 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1222 	if (!adev->gfx.rlc.register_list_format) {
1223 		err = -ENOMEM;
1224 		goto out;
1225 	}
1226 
1227 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1228 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1229 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1230 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1231 
1232 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1233 
1234 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1235 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1236 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1237 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1238 
1239 	if (adev->gfx.rlc.is_rlc_v2_1)
1240 		gfx_v9_0_init_rlc_ext_microcode(adev);
1241 
1242 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1243 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1244 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1245 		info->fw = adev->gfx.rlc_fw;
1246 		header = (const struct common_firmware_header *)info->fw->data;
1247 		adev->firmware.fw_size +=
1248 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1249 
1250 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1251 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1252 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1253 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1254 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1255 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1256 			info->fw = adev->gfx.rlc_fw;
1257 			adev->firmware.fw_size +=
1258 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1259 
1260 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1261 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1262 			info->fw = adev->gfx.rlc_fw;
1263 			adev->firmware.fw_size +=
1264 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1265 
1266 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1267 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1268 			info->fw = adev->gfx.rlc_fw;
1269 			adev->firmware.fw_size +=
1270 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1271 		}
1272 	}
1273 
1274 out:
1275 	if (err) {
1276 		dev_err(adev->dev,
1277 			"gfx9: Failed to load firmware \"%s\"\n",
1278 			fw_name);
1279 		release_firmware(adev->gfx.rlc_fw);
1280 		adev->gfx.rlc_fw = NULL;
1281 	}
1282 	return err;
1283 }
1284 
1285 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1286 					  const char *chip_name)
1287 {
1288 	char fw_name[30];
1289 	int err;
1290 	struct amdgpu_firmware_info *info = NULL;
1291 	const struct common_firmware_header *header = NULL;
1292 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1293 
1294 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1295 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1296 	if (err)
1297 		goto out;
1298 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1299 	if (err)
1300 		goto out;
1301 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1302 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1303 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1304 
1305 
1306 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1307 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1308 	if (!err) {
1309 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1310 		if (err)
1311 			goto out;
1312 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1313 		adev->gfx.mec2_fw->data;
1314 		adev->gfx.mec2_fw_version =
1315 		le32_to_cpu(cp_hdr->header.ucode_version);
1316 		adev->gfx.mec2_feature_version =
1317 		le32_to_cpu(cp_hdr->ucode_feature_version);
1318 	} else {
1319 		err = 0;
1320 		adev->gfx.mec2_fw = NULL;
1321 	}
1322 
1323 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1324 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1325 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1326 		info->fw = adev->gfx.mec_fw;
1327 		header = (const struct common_firmware_header *)info->fw->data;
1328 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1329 		adev->firmware.fw_size +=
1330 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1331 
1332 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1333 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1334 		info->fw = adev->gfx.mec_fw;
1335 		adev->firmware.fw_size +=
1336 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1337 
1338 		if (adev->gfx.mec2_fw) {
1339 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1340 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1341 			info->fw = adev->gfx.mec2_fw;
1342 			header = (const struct common_firmware_header *)info->fw->data;
1343 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1344 			adev->firmware.fw_size +=
1345 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1346 
1347 			/* TODO: Determine if MEC2 JT FW loading can be removed
1348 				 for all GFX V9 asic and above */
1349 			if (adev->asic_type != CHIP_ARCTURUS &&
1350 			    adev->asic_type != CHIP_RENOIR) {
1351 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1352 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1353 				info->fw = adev->gfx.mec2_fw;
1354 				adev->firmware.fw_size +=
1355 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1356 					PAGE_SIZE);
1357 			}
1358 		}
1359 	}
1360 
1361 out:
1362 	gfx_v9_0_check_if_need_gfxoff(adev);
1363 	gfx_v9_0_check_fw_write_wait(adev);
1364 	if (err) {
1365 		dev_err(adev->dev,
1366 			"gfx9: Failed to load firmware \"%s\"\n",
1367 			fw_name);
1368 		release_firmware(adev->gfx.mec_fw);
1369 		adev->gfx.mec_fw = NULL;
1370 		release_firmware(adev->gfx.mec2_fw);
1371 		adev->gfx.mec2_fw = NULL;
1372 	}
1373 	return err;
1374 }
1375 
1376 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1377 {
1378 	const char *chip_name;
1379 	int r;
1380 
1381 	DRM_DEBUG("\n");
1382 
1383 	switch (adev->asic_type) {
1384 	case CHIP_VEGA10:
1385 		chip_name = "vega10";
1386 		break;
1387 	case CHIP_VEGA12:
1388 		chip_name = "vega12";
1389 		break;
1390 	case CHIP_VEGA20:
1391 		chip_name = "vega20";
1392 		break;
1393 	case CHIP_RAVEN:
1394 		if (adev->rev_id >= 8)
1395 			chip_name = "raven2";
1396 		else if (adev->pdev->device == 0x15d8)
1397 			chip_name = "picasso";
1398 		else
1399 			chip_name = "raven";
1400 		break;
1401 	case CHIP_ARCTURUS:
1402 		chip_name = "arcturus";
1403 		break;
1404 	case CHIP_RENOIR:
1405 		chip_name = "renoir";
1406 		break;
1407 	default:
1408 		BUG();
1409 	}
1410 
1411 	/* No CPG in Arcturus */
1412 	if (adev->asic_type != CHIP_ARCTURUS) {
1413 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1414 		if (r)
1415 			return r;
1416 	}
1417 
1418 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1419 	if (r)
1420 		return r;
1421 
1422 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1423 	if (r)
1424 		return r;
1425 
1426 	return r;
1427 }
1428 
1429 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1430 {
1431 	u32 count = 0;
1432 	const struct cs_section_def *sect = NULL;
1433 	const struct cs_extent_def *ext = NULL;
1434 
1435 	/* begin clear state */
1436 	count += 2;
1437 	/* context control state */
1438 	count += 3;
1439 
1440 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1441 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1442 			if (sect->id == SECT_CONTEXT)
1443 				count += 2 + ext->reg_count;
1444 			else
1445 				return 0;
1446 		}
1447 	}
1448 
1449 	/* end clear state */
1450 	count += 2;
1451 	/* clear state */
1452 	count += 2;
1453 
1454 	return count;
1455 }
1456 
1457 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1458 				    volatile u32 *buffer)
1459 {
1460 	u32 count = 0, i;
1461 	const struct cs_section_def *sect = NULL;
1462 	const struct cs_extent_def *ext = NULL;
1463 
1464 	if (adev->gfx.rlc.cs_data == NULL)
1465 		return;
1466 	if (buffer == NULL)
1467 		return;
1468 
1469 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1470 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1471 
1472 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1473 	buffer[count++] = cpu_to_le32(0x80000000);
1474 	buffer[count++] = cpu_to_le32(0x80000000);
1475 
1476 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1477 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1478 			if (sect->id == SECT_CONTEXT) {
1479 				buffer[count++] =
1480 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1481 				buffer[count++] = cpu_to_le32(ext->reg_index -
1482 						PACKET3_SET_CONTEXT_REG_START);
1483 				for (i = 0; i < ext->reg_count; i++)
1484 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1485 			} else {
1486 				return;
1487 			}
1488 		}
1489 	}
1490 
1491 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1492 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1493 
1494 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1495 	buffer[count++] = cpu_to_le32(0);
1496 }
1497 
1498 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1499 {
1500 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1501 	uint32_t pg_always_on_cu_num = 2;
1502 	uint32_t always_on_cu_num;
1503 	uint32_t i, j, k;
1504 	uint32_t mask, cu_bitmap, counter;
1505 
1506 	if (adev->flags & AMD_IS_APU)
1507 		always_on_cu_num = 4;
1508 	else if (adev->asic_type == CHIP_VEGA12)
1509 		always_on_cu_num = 8;
1510 	else
1511 		always_on_cu_num = 12;
1512 
1513 	mutex_lock(&adev->grbm_idx_mutex);
1514 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1515 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1516 			mask = 1;
1517 			cu_bitmap = 0;
1518 			counter = 0;
1519 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1520 
1521 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1522 				if (cu_info->bitmap[i][j] & mask) {
1523 					if (counter == pg_always_on_cu_num)
1524 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1525 					if (counter < always_on_cu_num)
1526 						cu_bitmap |= mask;
1527 					else
1528 						break;
1529 					counter++;
1530 				}
1531 				mask <<= 1;
1532 			}
1533 
1534 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1535 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1536 		}
1537 	}
1538 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1539 	mutex_unlock(&adev->grbm_idx_mutex);
1540 }
1541 
1542 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1543 {
1544 	uint32_t data;
1545 
1546 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1547 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1548 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1549 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1550 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1551 
1552 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1553 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1554 
1555 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1556 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1557 
1558 	mutex_lock(&adev->grbm_idx_mutex);
1559 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1560 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1561 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1562 
1563 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1564 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1565 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1566 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1567 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1568 
1569 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1570 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1571 	data &= 0x0000FFFF;
1572 	data |= 0x00C00000;
1573 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1574 
1575 	/*
1576 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1577 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1578 	 */
1579 
1580 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1581 	 * but used for RLC_LB_CNTL configuration */
1582 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1583 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1584 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1585 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1586 	mutex_unlock(&adev->grbm_idx_mutex);
1587 
1588 	gfx_v9_0_init_always_on_cu_mask(adev);
1589 }
1590 
1591 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1592 {
1593 	uint32_t data;
1594 
1595 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1596 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1597 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1598 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1599 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1600 
1601 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1602 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1603 
1604 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1605 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1606 
1607 	mutex_lock(&adev->grbm_idx_mutex);
1608 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1609 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1610 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1611 
1612 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1613 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1614 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1615 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1616 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1617 
1618 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1619 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1620 	data &= 0x0000FFFF;
1621 	data |= 0x00C00000;
1622 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1623 
1624 	/*
1625 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1626 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1627 	 */
1628 
1629 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1630 	 * but used for RLC_LB_CNTL configuration */
1631 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1632 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1633 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1634 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1635 	mutex_unlock(&adev->grbm_idx_mutex);
1636 
1637 	gfx_v9_0_init_always_on_cu_mask(adev);
1638 }
1639 
1640 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1641 {
1642 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1643 }
1644 
1645 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1646 {
1647 	return 5;
1648 }
1649 
1650 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1651 {
1652 	const struct cs_section_def *cs_data;
1653 	int r;
1654 
1655 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1656 
1657 	cs_data = adev->gfx.rlc.cs_data;
1658 
1659 	if (cs_data) {
1660 		/* init clear state block */
1661 		r = amdgpu_gfx_rlc_init_csb(adev);
1662 		if (r)
1663 			return r;
1664 	}
1665 
1666 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1667 		/* TODO: double check the cp_table_size for RV */
1668 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1669 		r = amdgpu_gfx_rlc_init_cpt(adev);
1670 		if (r)
1671 			return r;
1672 	}
1673 
1674 	switch (adev->asic_type) {
1675 	case CHIP_RAVEN:
1676 		gfx_v9_0_init_lbpw(adev);
1677 		break;
1678 	case CHIP_VEGA20:
1679 		gfx_v9_4_init_lbpw(adev);
1680 		break;
1681 	default:
1682 		break;
1683 	}
1684 
1685 	return 0;
1686 }
1687 
1688 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1689 {
1690 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1691 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1692 }
1693 
1694 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1695 {
1696 	int r;
1697 	u32 *hpd;
1698 	const __le32 *fw_data;
1699 	unsigned fw_size;
1700 	u32 *fw;
1701 	size_t mec_hpd_size;
1702 
1703 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1704 
1705 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1706 
1707 	/* take ownership of the relevant compute queues */
1708 	amdgpu_gfx_compute_queue_acquire(adev);
1709 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1710 
1711 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1712 				      AMDGPU_GEM_DOMAIN_VRAM,
1713 				      &adev->gfx.mec.hpd_eop_obj,
1714 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1715 				      (void **)&hpd);
1716 	if (r) {
1717 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1718 		gfx_v9_0_mec_fini(adev);
1719 		return r;
1720 	}
1721 
1722 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1723 
1724 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1725 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1726 
1727 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1728 
1729 	fw_data = (const __le32 *)
1730 		(adev->gfx.mec_fw->data +
1731 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1732 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1733 
1734 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1735 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1736 				      &adev->gfx.mec.mec_fw_obj,
1737 				      &adev->gfx.mec.mec_fw_gpu_addr,
1738 				      (void **)&fw);
1739 	if (r) {
1740 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1741 		gfx_v9_0_mec_fini(adev);
1742 		return r;
1743 	}
1744 
1745 	memcpy(fw, fw_data, fw_size);
1746 
1747 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1748 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1749 
1750 	return 0;
1751 }
1752 
1753 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1754 {
1755 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1756 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1757 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1758 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1759 		(SQ_IND_INDEX__FORCE_READ_MASK));
1760 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1761 }
1762 
1763 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1764 			   uint32_t wave, uint32_t thread,
1765 			   uint32_t regno, uint32_t num, uint32_t *out)
1766 {
1767 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1768 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1769 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1770 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1771 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1772 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1773 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1774 	while (num--)
1775 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1776 }
1777 
1778 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1779 {
1780 	/* type 1 wave data */
1781 	dst[(*no_fields)++] = 1;
1782 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1783 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1784 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1790 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1796 }
1797 
1798 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1799 				     uint32_t wave, uint32_t start,
1800 				     uint32_t size, uint32_t *dst)
1801 {
1802 	wave_read_regs(
1803 		adev, simd, wave, 0,
1804 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1805 }
1806 
1807 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1808 				     uint32_t wave, uint32_t thread,
1809 				     uint32_t start, uint32_t size,
1810 				     uint32_t *dst)
1811 {
1812 	wave_read_regs(
1813 		adev, simd, wave, thread,
1814 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1815 }
1816 
1817 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1818 				  u32 me, u32 pipe, u32 q, u32 vm)
1819 {
1820 	soc15_grbm_select(adev, me, pipe, q, vm);
1821 }
1822 
1823 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1824 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1825 	.select_se_sh = &gfx_v9_0_select_se_sh,
1826 	.read_wave_data = &gfx_v9_0_read_wave_data,
1827 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1828 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1829 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1830 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1831 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1832 };
1833 
1834 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1835 {
1836 	u32 gb_addr_config;
1837 	int err;
1838 
1839 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1840 
1841 	switch (adev->asic_type) {
1842 	case CHIP_VEGA10:
1843 		adev->gfx.config.max_hw_contexts = 8;
1844 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1845 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1846 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1847 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1848 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1849 		break;
1850 	case CHIP_VEGA12:
1851 		adev->gfx.config.max_hw_contexts = 8;
1852 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1853 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1854 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1855 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1856 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1857 		DRM_INFO("fix gfx.config for vega12\n");
1858 		break;
1859 	case CHIP_VEGA20:
1860 		adev->gfx.config.max_hw_contexts = 8;
1861 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1862 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1863 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1864 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1865 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1866 		gb_addr_config &= ~0xf3e777ff;
1867 		gb_addr_config |= 0x22014042;
1868 		/* check vbios table if gpu info is not available */
1869 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1870 		if (err)
1871 			return err;
1872 		break;
1873 	case CHIP_RAVEN:
1874 		adev->gfx.config.max_hw_contexts = 8;
1875 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879 		if (adev->rev_id >= 8)
1880 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1881 		else
1882 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1883 		break;
1884 	case CHIP_ARCTURUS:
1885 		adev->gfx.config.max_hw_contexts = 8;
1886 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1887 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1888 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1889 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1890 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1891 		gb_addr_config &= ~0xf3e777ff;
1892 		gb_addr_config |= 0x22014042;
1893 		break;
1894 	case CHIP_RENOIR:
1895 		adev->gfx.config.max_hw_contexts = 8;
1896 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1897 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1898 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1899 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1900 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1901 		gb_addr_config &= ~0xf3e777ff;
1902 		gb_addr_config |= 0x22010042;
1903 		break;
1904 	default:
1905 		BUG();
1906 		break;
1907 	}
1908 
1909 	adev->gfx.config.gb_addr_config = gb_addr_config;
1910 
1911 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1912 			REG_GET_FIELD(
1913 					adev->gfx.config.gb_addr_config,
1914 					GB_ADDR_CONFIG,
1915 					NUM_PIPES);
1916 
1917 	adev->gfx.config.max_tile_pipes =
1918 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1919 
1920 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1921 			REG_GET_FIELD(
1922 					adev->gfx.config.gb_addr_config,
1923 					GB_ADDR_CONFIG,
1924 					NUM_BANKS);
1925 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1926 			REG_GET_FIELD(
1927 					adev->gfx.config.gb_addr_config,
1928 					GB_ADDR_CONFIG,
1929 					MAX_COMPRESSED_FRAGS);
1930 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1931 			REG_GET_FIELD(
1932 					adev->gfx.config.gb_addr_config,
1933 					GB_ADDR_CONFIG,
1934 					NUM_RB_PER_SE);
1935 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1936 			REG_GET_FIELD(
1937 					adev->gfx.config.gb_addr_config,
1938 					GB_ADDR_CONFIG,
1939 					NUM_SHADER_ENGINES);
1940 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1941 			REG_GET_FIELD(
1942 					adev->gfx.config.gb_addr_config,
1943 					GB_ADDR_CONFIG,
1944 					PIPE_INTERLEAVE_SIZE));
1945 
1946 	return 0;
1947 }
1948 
1949 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1950 				      int mec, int pipe, int queue)
1951 {
1952 	int r;
1953 	unsigned irq_type;
1954 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1955 
1956 	ring = &adev->gfx.compute_ring[ring_id];
1957 
1958 	/* mec0 is me1 */
1959 	ring->me = mec + 1;
1960 	ring->pipe = pipe;
1961 	ring->queue = queue;
1962 
1963 	ring->ring_obj = NULL;
1964 	ring->use_doorbell = true;
1965 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1966 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1967 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1968 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1969 
1970 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1971 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1972 		+ ring->pipe;
1973 
1974 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1975 	r = amdgpu_ring_init(adev, ring, 1024,
1976 			     &adev->gfx.eop_irq, irq_type);
1977 	if (r)
1978 		return r;
1979 
1980 
1981 	return 0;
1982 }
1983 
1984 static int gfx_v9_0_sw_init(void *handle)
1985 {
1986 	int i, j, k, r, ring_id;
1987 	struct amdgpu_ring *ring;
1988 	struct amdgpu_kiq *kiq;
1989 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1990 
1991 	switch (adev->asic_type) {
1992 	case CHIP_VEGA10:
1993 	case CHIP_VEGA12:
1994 	case CHIP_VEGA20:
1995 	case CHIP_RAVEN:
1996 	case CHIP_ARCTURUS:
1997 	case CHIP_RENOIR:
1998 		adev->gfx.mec.num_mec = 2;
1999 		break;
2000 	default:
2001 		adev->gfx.mec.num_mec = 1;
2002 		break;
2003 	}
2004 
2005 	adev->gfx.mec.num_pipe_per_mec = 4;
2006 	adev->gfx.mec.num_queue_per_pipe = 8;
2007 
2008 	/* EOP Event */
2009 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2010 	if (r)
2011 		return r;
2012 
2013 	/* Privileged reg */
2014 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2015 			      &adev->gfx.priv_reg_irq);
2016 	if (r)
2017 		return r;
2018 
2019 	/* Privileged inst */
2020 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2021 			      &adev->gfx.priv_inst_irq);
2022 	if (r)
2023 		return r;
2024 
2025 	/* ECC error */
2026 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2027 			      &adev->gfx.cp_ecc_error_irq);
2028 	if (r)
2029 		return r;
2030 
2031 	/* FUE error */
2032 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2033 			      &adev->gfx.cp_ecc_error_irq);
2034 	if (r)
2035 		return r;
2036 
2037 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2038 
2039 	gfx_v9_0_scratch_init(adev);
2040 
2041 	r = gfx_v9_0_init_microcode(adev);
2042 	if (r) {
2043 		DRM_ERROR("Failed to load gfx firmware!\n");
2044 		return r;
2045 	}
2046 
2047 	r = adev->gfx.rlc.funcs->init(adev);
2048 	if (r) {
2049 		DRM_ERROR("Failed to init rlc BOs!\n");
2050 		return r;
2051 	}
2052 
2053 	r = gfx_v9_0_mec_init(adev);
2054 	if (r) {
2055 		DRM_ERROR("Failed to init MEC BOs!\n");
2056 		return r;
2057 	}
2058 
2059 	/* set up the gfx ring */
2060 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2061 		ring = &adev->gfx.gfx_ring[i];
2062 		ring->ring_obj = NULL;
2063 		if (!i)
2064 			sprintf(ring->name, "gfx");
2065 		else
2066 			sprintf(ring->name, "gfx_%d", i);
2067 		ring->use_doorbell = true;
2068 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2069 		r = amdgpu_ring_init(adev, ring, 1024,
2070 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2071 		if (r)
2072 			return r;
2073 	}
2074 
2075 	/* set up the compute queues - allocate horizontally across pipes */
2076 	ring_id = 0;
2077 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2078 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2079 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2080 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2081 					continue;
2082 
2083 				r = gfx_v9_0_compute_ring_init(adev,
2084 							       ring_id,
2085 							       i, k, j);
2086 				if (r)
2087 					return r;
2088 
2089 				ring_id++;
2090 			}
2091 		}
2092 	}
2093 
2094 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2095 	if (r) {
2096 		DRM_ERROR("Failed to init KIQ BOs!\n");
2097 		return r;
2098 	}
2099 
2100 	kiq = &adev->gfx.kiq;
2101 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2102 	if (r)
2103 		return r;
2104 
2105 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2106 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2107 	if (r)
2108 		return r;
2109 
2110 	adev->gfx.ce_ram_size = 0x8000;
2111 
2112 	r = gfx_v9_0_gpu_early_init(adev);
2113 	if (r)
2114 		return r;
2115 
2116 	return 0;
2117 }
2118 
2119 
2120 static int gfx_v9_0_sw_fini(void *handle)
2121 {
2122 	int i;
2123 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2124 
2125 	amdgpu_gfx_ras_fini(adev);
2126 
2127 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2128 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2129 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2130 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2131 
2132 	amdgpu_gfx_mqd_sw_fini(adev);
2133 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2134 	amdgpu_gfx_kiq_fini(adev);
2135 
2136 	gfx_v9_0_mec_fini(adev);
2137 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2138 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2139 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2140 				&adev->gfx.rlc.cp_table_gpu_addr,
2141 				(void **)&adev->gfx.rlc.cp_table_ptr);
2142 	}
2143 	gfx_v9_0_free_microcode(adev);
2144 
2145 	return 0;
2146 }
2147 
2148 
2149 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2150 {
2151 	/* TODO */
2152 }
2153 
2154 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2155 {
2156 	u32 data;
2157 
2158 	if (instance == 0xffffffff)
2159 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2160 	else
2161 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2162 
2163 	if (se_num == 0xffffffff)
2164 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2165 	else
2166 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2167 
2168 	if (sh_num == 0xffffffff)
2169 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2170 	else
2171 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2172 
2173 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2174 }
2175 
2176 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2177 {
2178 	u32 data, mask;
2179 
2180 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2181 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2182 
2183 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2184 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2185 
2186 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2187 					 adev->gfx.config.max_sh_per_se);
2188 
2189 	return (~data) & mask;
2190 }
2191 
2192 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2193 {
2194 	int i, j;
2195 	u32 data;
2196 	u32 active_rbs = 0;
2197 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2198 					adev->gfx.config.max_sh_per_se;
2199 
2200 	mutex_lock(&adev->grbm_idx_mutex);
2201 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2202 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2203 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2204 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2205 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2206 					       rb_bitmap_width_per_sh);
2207 		}
2208 	}
2209 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2210 	mutex_unlock(&adev->grbm_idx_mutex);
2211 
2212 	adev->gfx.config.backend_enable_mask = active_rbs;
2213 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2214 }
2215 
2216 #define DEFAULT_SH_MEM_BASES	(0x6000)
2217 #define FIRST_COMPUTE_VMID	(8)
2218 #define LAST_COMPUTE_VMID	(16)
2219 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2220 {
2221 	int i;
2222 	uint32_t sh_mem_config;
2223 	uint32_t sh_mem_bases;
2224 
2225 	/*
2226 	 * Configure apertures:
2227 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2228 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2229 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2230 	 */
2231 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2232 
2233 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2234 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2235 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2236 
2237 	mutex_lock(&adev->srbm_mutex);
2238 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2239 		soc15_grbm_select(adev, 0, 0, 0, i);
2240 		/* CP and shaders */
2241 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2242 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2243 	}
2244 	soc15_grbm_select(adev, 0, 0, 0, 0);
2245 	mutex_unlock(&adev->srbm_mutex);
2246 
2247 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2248 	   acccess. These should be enabled by FW for target VMIDs. */
2249 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2250 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2251 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2252 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2253 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2254 	}
2255 }
2256 
2257 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2258 {
2259 	int vmid;
2260 
2261 	/*
2262 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2263 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2264 	 * the driver can enable them for graphics. VMID0 should maintain
2265 	 * access so that HWS firmware can save/restore entries.
2266 	 */
2267 	for (vmid = 1; vmid < 16; vmid++) {
2268 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2269 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2270 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2271 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2272 	}
2273 }
2274 
2275 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2276 {
2277 	u32 tmp;
2278 	int i;
2279 
2280 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2281 
2282 	gfx_v9_0_tiling_mode_table_init(adev);
2283 
2284 	gfx_v9_0_setup_rb(adev);
2285 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2286 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2287 
2288 	/* XXX SH_MEM regs */
2289 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2290 	mutex_lock(&adev->srbm_mutex);
2291 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2292 		soc15_grbm_select(adev, 0, 0, 0, i);
2293 		/* CP and shaders */
2294 		if (i == 0) {
2295 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2296 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2297 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2298 					    !!amdgpu_noretry);
2299 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2300 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2301 		} else {
2302 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2303 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2304 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2305 					    !!amdgpu_noretry);
2306 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2307 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2308 				(adev->gmc.private_aperture_start >> 48));
2309 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2310 				(adev->gmc.shared_aperture_start >> 48));
2311 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2312 		}
2313 	}
2314 	soc15_grbm_select(adev, 0, 0, 0, 0);
2315 
2316 	mutex_unlock(&adev->srbm_mutex);
2317 
2318 	gfx_v9_0_init_compute_vmid(adev);
2319 	gfx_v9_0_init_gds_vmid(adev);
2320 }
2321 
2322 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2323 {
2324 	u32 i, j, k;
2325 	u32 mask;
2326 
2327 	mutex_lock(&adev->grbm_idx_mutex);
2328 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2329 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2330 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2331 			for (k = 0; k < adev->usec_timeout; k++) {
2332 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2333 					break;
2334 				udelay(1);
2335 			}
2336 			if (k == adev->usec_timeout) {
2337 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2338 						      0xffffffff, 0xffffffff);
2339 				mutex_unlock(&adev->grbm_idx_mutex);
2340 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2341 					 i, j);
2342 				return;
2343 			}
2344 		}
2345 	}
2346 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2347 	mutex_unlock(&adev->grbm_idx_mutex);
2348 
2349 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2350 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2351 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2352 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2353 	for (k = 0; k < adev->usec_timeout; k++) {
2354 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2355 			break;
2356 		udelay(1);
2357 	}
2358 }
2359 
2360 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2361 					       bool enable)
2362 {
2363 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2364 
2365 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2366 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2367 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2368 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2369 
2370 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2371 }
2372 
2373 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2374 {
2375 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2376 	/* csib */
2377 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2378 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2379 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2380 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2381 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2382 			adev->gfx.rlc.clear_state_size);
2383 }
2384 
2385 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2386 				int indirect_offset,
2387 				int list_size,
2388 				int *unique_indirect_regs,
2389 				int unique_indirect_reg_count,
2390 				int *indirect_start_offsets,
2391 				int *indirect_start_offsets_count,
2392 				int max_start_offsets_count)
2393 {
2394 	int idx;
2395 
2396 	for (; indirect_offset < list_size; indirect_offset++) {
2397 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2398 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2399 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2400 
2401 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2402 			indirect_offset += 2;
2403 
2404 			/* look for the matching indice */
2405 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2406 				if (unique_indirect_regs[idx] ==
2407 					register_list_format[indirect_offset] ||
2408 					!unique_indirect_regs[idx])
2409 					break;
2410 			}
2411 
2412 			BUG_ON(idx >= unique_indirect_reg_count);
2413 
2414 			if (!unique_indirect_regs[idx])
2415 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2416 
2417 			indirect_offset++;
2418 		}
2419 	}
2420 }
2421 
2422 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2423 {
2424 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2425 	int unique_indirect_reg_count = 0;
2426 
2427 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2428 	int indirect_start_offsets_count = 0;
2429 
2430 	int list_size = 0;
2431 	int i = 0, j = 0;
2432 	u32 tmp = 0;
2433 
2434 	u32 *register_list_format =
2435 		kmemdup(adev->gfx.rlc.register_list_format,
2436 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2437 	if (!register_list_format)
2438 		return -ENOMEM;
2439 
2440 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2441 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2442 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2443 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2444 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2445 				    unique_indirect_regs,
2446 				    unique_indirect_reg_count,
2447 				    indirect_start_offsets,
2448 				    &indirect_start_offsets_count,
2449 				    ARRAY_SIZE(indirect_start_offsets));
2450 
2451 	/* enable auto inc in case it is disabled */
2452 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2453 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2454 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2455 
2456 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2457 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2458 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2459 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2460 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2461 			adev->gfx.rlc.register_restore[i]);
2462 
2463 	/* load indirect register */
2464 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2465 		adev->gfx.rlc.reg_list_format_start);
2466 
2467 	/* direct register portion */
2468 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2469 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2470 			register_list_format[i]);
2471 
2472 	/* indirect register portion */
2473 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2474 		if (register_list_format[i] == 0xFFFFFFFF) {
2475 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2476 			continue;
2477 		}
2478 
2479 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2480 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2481 
2482 		for (j = 0; j < unique_indirect_reg_count; j++) {
2483 			if (register_list_format[i] == unique_indirect_regs[j]) {
2484 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2485 				break;
2486 			}
2487 		}
2488 
2489 		BUG_ON(j >= unique_indirect_reg_count);
2490 
2491 		i++;
2492 	}
2493 
2494 	/* set save/restore list size */
2495 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2496 	list_size = list_size >> 1;
2497 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2498 		adev->gfx.rlc.reg_restore_list_size);
2499 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2500 
2501 	/* write the starting offsets to RLC scratch ram */
2502 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2503 		adev->gfx.rlc.starting_offsets_start);
2504 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2505 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2506 		       indirect_start_offsets[i]);
2507 
2508 	/* load unique indirect regs*/
2509 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2510 		if (unique_indirect_regs[i] != 0) {
2511 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2512 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2513 			       unique_indirect_regs[i] & 0x3FFFF);
2514 
2515 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2516 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2517 			       unique_indirect_regs[i] >> 20);
2518 		}
2519 	}
2520 
2521 	kfree(register_list_format);
2522 	return 0;
2523 }
2524 
2525 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2526 {
2527 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2528 }
2529 
2530 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2531 					     bool enable)
2532 {
2533 	uint32_t data = 0;
2534 	uint32_t default_data = 0;
2535 
2536 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2537 	if (enable == true) {
2538 		/* enable GFXIP control over CGPG */
2539 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2540 		if(default_data != data)
2541 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2542 
2543 		/* update status */
2544 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2545 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2546 		if(default_data != data)
2547 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2548 	} else {
2549 		/* restore GFXIP control over GCPG */
2550 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2551 		if(default_data != data)
2552 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2553 	}
2554 }
2555 
2556 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2557 {
2558 	uint32_t data = 0;
2559 
2560 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2561 			      AMD_PG_SUPPORT_GFX_SMG |
2562 			      AMD_PG_SUPPORT_GFX_DMG)) {
2563 		/* init IDLE_POLL_COUNT = 60 */
2564 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2565 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2566 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2567 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2568 
2569 		/* init RLC PG Delay */
2570 		data = 0;
2571 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2572 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2573 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2574 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2575 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2576 
2577 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2578 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2579 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2580 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2581 
2582 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2583 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2584 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2585 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2586 
2587 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2588 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2589 
2590 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2591 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2592 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2593 
2594 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2595 	}
2596 }
2597 
2598 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2599 						bool enable)
2600 {
2601 	uint32_t data = 0;
2602 	uint32_t default_data = 0;
2603 
2604 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2605 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2606 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2607 			     enable ? 1 : 0);
2608 	if (default_data != data)
2609 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2610 }
2611 
2612 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2613 						bool enable)
2614 {
2615 	uint32_t data = 0;
2616 	uint32_t default_data = 0;
2617 
2618 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2619 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2620 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2621 			     enable ? 1 : 0);
2622 	if(default_data != data)
2623 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2624 }
2625 
2626 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2627 					bool enable)
2628 {
2629 	uint32_t data = 0;
2630 	uint32_t default_data = 0;
2631 
2632 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2633 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2634 			     CP_PG_DISABLE,
2635 			     enable ? 0 : 1);
2636 	if(default_data != data)
2637 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2638 }
2639 
2640 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2641 						bool enable)
2642 {
2643 	uint32_t data, default_data;
2644 
2645 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2646 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2647 			     GFX_POWER_GATING_ENABLE,
2648 			     enable ? 1 : 0);
2649 	if(default_data != data)
2650 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2651 }
2652 
2653 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2654 						bool enable)
2655 {
2656 	uint32_t data, default_data;
2657 
2658 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2659 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2660 			     GFX_PIPELINE_PG_ENABLE,
2661 			     enable ? 1 : 0);
2662 	if(default_data != data)
2663 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2664 
2665 	if (!enable)
2666 		/* read any GFX register to wake up GFX */
2667 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2668 }
2669 
2670 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2671 						       bool enable)
2672 {
2673 	uint32_t data, default_data;
2674 
2675 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2676 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2677 			     STATIC_PER_CU_PG_ENABLE,
2678 			     enable ? 1 : 0);
2679 	if(default_data != data)
2680 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2681 }
2682 
2683 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2684 						bool enable)
2685 {
2686 	uint32_t data, default_data;
2687 
2688 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2689 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2690 			     DYN_PER_CU_PG_ENABLE,
2691 			     enable ? 1 : 0);
2692 	if(default_data != data)
2693 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2694 }
2695 
2696 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2697 {
2698 	gfx_v9_0_init_csb(adev);
2699 
2700 	/*
2701 	 * Rlc save restore list is workable since v2_1.
2702 	 * And it's needed by gfxoff feature.
2703 	 */
2704 	if (adev->gfx.rlc.is_rlc_v2_1) {
2705 		if (adev->asic_type == CHIP_VEGA12 ||
2706 		    (adev->asic_type == CHIP_RAVEN &&
2707 		     adev->rev_id >= 8))
2708 			gfx_v9_1_init_rlc_save_restore_list(adev);
2709 		gfx_v9_0_enable_save_restore_machine(adev);
2710 	}
2711 
2712 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2713 			      AMD_PG_SUPPORT_GFX_SMG |
2714 			      AMD_PG_SUPPORT_GFX_DMG |
2715 			      AMD_PG_SUPPORT_CP |
2716 			      AMD_PG_SUPPORT_GDS |
2717 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2718 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2719 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2720 		gfx_v9_0_init_gfx_power_gating(adev);
2721 	}
2722 }
2723 
2724 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2725 {
2726 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2727 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2728 	gfx_v9_0_wait_for_rlc_serdes(adev);
2729 }
2730 
2731 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2732 {
2733 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2734 	udelay(50);
2735 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2736 	udelay(50);
2737 }
2738 
2739 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2740 {
2741 #ifdef AMDGPU_RLC_DEBUG_RETRY
2742 	u32 rlc_ucode_ver;
2743 #endif
2744 
2745 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2746 	udelay(50);
2747 
2748 	/* carrizo do enable cp interrupt after cp inited */
2749 	if (!(adev->flags & AMD_IS_APU)) {
2750 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2751 		udelay(50);
2752 	}
2753 
2754 #ifdef AMDGPU_RLC_DEBUG_RETRY
2755 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2756 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2757 	if(rlc_ucode_ver == 0x108) {
2758 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2759 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2760 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2761 		 * default is 0x9C4 to create a 100us interval */
2762 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2763 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2764 		 * to disable the page fault retry interrupts, default is
2765 		 * 0x100 (256) */
2766 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2767 	}
2768 #endif
2769 }
2770 
2771 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2772 {
2773 	const struct rlc_firmware_header_v2_0 *hdr;
2774 	const __le32 *fw_data;
2775 	unsigned i, fw_size;
2776 
2777 	if (!adev->gfx.rlc_fw)
2778 		return -EINVAL;
2779 
2780 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2781 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2782 
2783 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2784 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2785 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2786 
2787 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2788 			RLCG_UCODE_LOADING_START_ADDRESS);
2789 	for (i = 0; i < fw_size; i++)
2790 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2791 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2792 
2793 	return 0;
2794 }
2795 
2796 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2797 {
2798 	int r;
2799 
2800 	if (amdgpu_sriov_vf(adev)) {
2801 		gfx_v9_0_init_csb(adev);
2802 		return 0;
2803 	}
2804 
2805 	adev->gfx.rlc.funcs->stop(adev);
2806 
2807 	/* disable CG */
2808 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2809 
2810 	gfx_v9_0_init_pg(adev);
2811 
2812 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2813 		/* legacy rlc firmware loading */
2814 		r = gfx_v9_0_rlc_load_microcode(adev);
2815 		if (r)
2816 			return r;
2817 	}
2818 
2819 	switch (adev->asic_type) {
2820 	case CHIP_RAVEN:
2821 		if (amdgpu_lbpw == 0)
2822 			gfx_v9_0_enable_lbpw(adev, false);
2823 		else
2824 			gfx_v9_0_enable_lbpw(adev, true);
2825 		break;
2826 	case CHIP_VEGA20:
2827 		if (amdgpu_lbpw > 0)
2828 			gfx_v9_0_enable_lbpw(adev, true);
2829 		else
2830 			gfx_v9_0_enable_lbpw(adev, false);
2831 		break;
2832 	default:
2833 		break;
2834 	}
2835 
2836 	adev->gfx.rlc.funcs->start(adev);
2837 
2838 	return 0;
2839 }
2840 
2841 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2842 {
2843 	int i;
2844 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2845 
2846 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2847 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2848 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2849 	if (!enable) {
2850 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2851 			adev->gfx.gfx_ring[i].sched.ready = false;
2852 	}
2853 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2854 	udelay(50);
2855 }
2856 
2857 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2858 {
2859 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2860 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2861 	const struct gfx_firmware_header_v1_0 *me_hdr;
2862 	const __le32 *fw_data;
2863 	unsigned i, fw_size;
2864 
2865 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2866 		return -EINVAL;
2867 
2868 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2869 		adev->gfx.pfp_fw->data;
2870 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2871 		adev->gfx.ce_fw->data;
2872 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2873 		adev->gfx.me_fw->data;
2874 
2875 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2876 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2877 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2878 
2879 	gfx_v9_0_cp_gfx_enable(adev, false);
2880 
2881 	/* PFP */
2882 	fw_data = (const __le32 *)
2883 		(adev->gfx.pfp_fw->data +
2884 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2885 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2886 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2887 	for (i = 0; i < fw_size; i++)
2888 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2889 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2890 
2891 	/* CE */
2892 	fw_data = (const __le32 *)
2893 		(adev->gfx.ce_fw->data +
2894 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2895 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2896 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2897 	for (i = 0; i < fw_size; i++)
2898 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2899 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2900 
2901 	/* ME */
2902 	fw_data = (const __le32 *)
2903 		(adev->gfx.me_fw->data +
2904 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2905 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2906 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2907 	for (i = 0; i < fw_size; i++)
2908 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2909 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2910 
2911 	return 0;
2912 }
2913 
2914 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2915 {
2916 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2917 	const struct cs_section_def *sect = NULL;
2918 	const struct cs_extent_def *ext = NULL;
2919 	int r, i, tmp;
2920 
2921 	/* init the CP */
2922 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2923 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2924 
2925 	gfx_v9_0_cp_gfx_enable(adev, true);
2926 
2927 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2928 	if (r) {
2929 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2930 		return r;
2931 	}
2932 
2933 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2934 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2935 
2936 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2937 	amdgpu_ring_write(ring, 0x80000000);
2938 	amdgpu_ring_write(ring, 0x80000000);
2939 
2940 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2941 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2942 			if (sect->id == SECT_CONTEXT) {
2943 				amdgpu_ring_write(ring,
2944 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2945 					       ext->reg_count));
2946 				amdgpu_ring_write(ring,
2947 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2948 				for (i = 0; i < ext->reg_count; i++)
2949 					amdgpu_ring_write(ring, ext->extent[i]);
2950 			}
2951 		}
2952 	}
2953 
2954 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2955 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2956 
2957 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2958 	amdgpu_ring_write(ring, 0);
2959 
2960 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2961 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2962 	amdgpu_ring_write(ring, 0x8000);
2963 	amdgpu_ring_write(ring, 0x8000);
2964 
2965 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2966 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2967 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2968 	amdgpu_ring_write(ring, tmp);
2969 	amdgpu_ring_write(ring, 0);
2970 
2971 	amdgpu_ring_commit(ring);
2972 
2973 	return 0;
2974 }
2975 
2976 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2977 {
2978 	struct amdgpu_ring *ring;
2979 	u32 tmp;
2980 	u32 rb_bufsz;
2981 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2982 
2983 	/* Set the write pointer delay */
2984 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2985 
2986 	/* set the RB to use vmid 0 */
2987 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2988 
2989 	/* Set ring buffer size */
2990 	ring = &adev->gfx.gfx_ring[0];
2991 	rb_bufsz = order_base_2(ring->ring_size / 8);
2992 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2993 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2994 #ifdef __BIG_ENDIAN
2995 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2996 #endif
2997 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2998 
2999 	/* Initialize the ring buffer's write pointers */
3000 	ring->wptr = 0;
3001 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3002 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3003 
3004 	/* set the wb address wether it's enabled or not */
3005 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3006 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3007 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3008 
3009 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3010 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3011 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3012 
3013 	mdelay(1);
3014 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3015 
3016 	rb_addr = ring->gpu_addr >> 8;
3017 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3018 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3019 
3020 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3021 	if (ring->use_doorbell) {
3022 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3023 				    DOORBELL_OFFSET, ring->doorbell_index);
3024 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3025 				    DOORBELL_EN, 1);
3026 	} else {
3027 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3028 	}
3029 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3030 
3031 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3032 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3033 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3034 
3035 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3036 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3037 
3038 
3039 	/* start the ring */
3040 	gfx_v9_0_cp_gfx_start(adev);
3041 	ring->sched.ready = true;
3042 
3043 	return 0;
3044 }
3045 
3046 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3047 {
3048 	int i;
3049 
3050 	if (enable) {
3051 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3052 	} else {
3053 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3054 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3055 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3056 			adev->gfx.compute_ring[i].sched.ready = false;
3057 		adev->gfx.kiq.ring.sched.ready = false;
3058 	}
3059 	udelay(50);
3060 }
3061 
3062 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3063 {
3064 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3065 	const __le32 *fw_data;
3066 	unsigned i;
3067 	u32 tmp;
3068 
3069 	if (!adev->gfx.mec_fw)
3070 		return -EINVAL;
3071 
3072 	gfx_v9_0_cp_compute_enable(adev, false);
3073 
3074 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3075 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3076 
3077 	fw_data = (const __le32 *)
3078 		(adev->gfx.mec_fw->data +
3079 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3080 	tmp = 0;
3081 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3082 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3083 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3084 
3085 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3086 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3087 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3088 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3089 
3090 	/* MEC1 */
3091 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3092 			 mec_hdr->jt_offset);
3093 	for (i = 0; i < mec_hdr->jt_size; i++)
3094 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3095 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3096 
3097 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3098 			adev->gfx.mec_fw_version);
3099 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3100 
3101 	return 0;
3102 }
3103 
3104 /* KIQ functions */
3105 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3106 {
3107 	uint32_t tmp;
3108 	struct amdgpu_device *adev = ring->adev;
3109 
3110 	/* tell RLC which is KIQ queue */
3111 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3112 	tmp &= 0xffffff00;
3113 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3114 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3115 	tmp |= 0x80;
3116 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3117 }
3118 
3119 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3120 {
3121 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3122 	uint64_t queue_mask = 0;
3123 	int r, i;
3124 
3125 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3126 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3127 			continue;
3128 
3129 		/* This situation may be hit in the future if a new HW
3130 		 * generation exposes more than 64 queues. If so, the
3131 		 * definition of queue_mask needs updating */
3132 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3133 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3134 			break;
3135 		}
3136 
3137 		queue_mask |= (1ull << i);
3138 	}
3139 
3140 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3141 	if (r) {
3142 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3143 		return r;
3144 	}
3145 
3146 	/* set resources */
3147 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3148 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3149 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3150 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3151 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3152 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3153 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3154 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3155 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3156 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3157 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3158 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3159 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3160 
3161 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3162 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3163 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3164 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3165 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3166 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3167 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3168 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3169 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3170 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3171 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3172 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3173 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3174 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3175 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3176 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3177 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3178 	}
3179 
3180 	r = amdgpu_ring_test_helper(kiq_ring);
3181 	if (r)
3182 		DRM_ERROR("KCQ enable failed\n");
3183 
3184 	return r;
3185 }
3186 
3187 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3188 {
3189 	struct amdgpu_device *adev = ring->adev;
3190 	struct v9_mqd *mqd = ring->mqd_ptr;
3191 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3192 	uint32_t tmp;
3193 
3194 	mqd->header = 0xC0310800;
3195 	mqd->compute_pipelinestat_enable = 0x00000001;
3196 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3197 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3198 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3199 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3200 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3201 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3202 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3203 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3204 	mqd->compute_misc_reserved = 0x00000003;
3205 
3206 	mqd->dynamic_cu_mask_addr_lo =
3207 		lower_32_bits(ring->mqd_gpu_addr
3208 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3209 	mqd->dynamic_cu_mask_addr_hi =
3210 		upper_32_bits(ring->mqd_gpu_addr
3211 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3212 
3213 	eop_base_addr = ring->eop_gpu_addr >> 8;
3214 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3215 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3216 
3217 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3218 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3219 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3220 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3221 
3222 	mqd->cp_hqd_eop_control = tmp;
3223 
3224 	/* enable doorbell? */
3225 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3226 
3227 	if (ring->use_doorbell) {
3228 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3229 				    DOORBELL_OFFSET, ring->doorbell_index);
3230 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3231 				    DOORBELL_EN, 1);
3232 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3233 				    DOORBELL_SOURCE, 0);
3234 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3235 				    DOORBELL_HIT, 0);
3236 	} else {
3237 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3238 					 DOORBELL_EN, 0);
3239 	}
3240 
3241 	mqd->cp_hqd_pq_doorbell_control = tmp;
3242 
3243 	/* disable the queue if it's active */
3244 	ring->wptr = 0;
3245 	mqd->cp_hqd_dequeue_request = 0;
3246 	mqd->cp_hqd_pq_rptr = 0;
3247 	mqd->cp_hqd_pq_wptr_lo = 0;
3248 	mqd->cp_hqd_pq_wptr_hi = 0;
3249 
3250 	/* set the pointer to the MQD */
3251 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3252 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3253 
3254 	/* set MQD vmid to 0 */
3255 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3256 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3257 	mqd->cp_mqd_control = tmp;
3258 
3259 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3260 	hqd_gpu_addr = ring->gpu_addr >> 8;
3261 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3262 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3263 
3264 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3265 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3266 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3267 			    (order_base_2(ring->ring_size / 4) - 1));
3268 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3269 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3270 #ifdef __BIG_ENDIAN
3271 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3272 #endif
3273 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3274 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3275 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3276 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3277 	mqd->cp_hqd_pq_control = tmp;
3278 
3279 	/* set the wb address whether it's enabled or not */
3280 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3281 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3282 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3283 		upper_32_bits(wb_gpu_addr) & 0xffff;
3284 
3285 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3286 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3287 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3288 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3289 
3290 	tmp = 0;
3291 	/* enable the doorbell if requested */
3292 	if (ring->use_doorbell) {
3293 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3294 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3295 				DOORBELL_OFFSET, ring->doorbell_index);
3296 
3297 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3298 					 DOORBELL_EN, 1);
3299 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3300 					 DOORBELL_SOURCE, 0);
3301 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3302 					 DOORBELL_HIT, 0);
3303 	}
3304 
3305 	mqd->cp_hqd_pq_doorbell_control = tmp;
3306 
3307 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3308 	ring->wptr = 0;
3309 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3310 
3311 	/* set the vmid for the queue */
3312 	mqd->cp_hqd_vmid = 0;
3313 
3314 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3315 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3316 	mqd->cp_hqd_persistent_state = tmp;
3317 
3318 	/* set MIN_IB_AVAIL_SIZE */
3319 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3320 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3321 	mqd->cp_hqd_ib_control = tmp;
3322 
3323 	/* activate the queue */
3324 	mqd->cp_hqd_active = 1;
3325 
3326 	return 0;
3327 }
3328 
3329 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3330 {
3331 	struct amdgpu_device *adev = ring->adev;
3332 	struct v9_mqd *mqd = ring->mqd_ptr;
3333 	int j;
3334 
3335 	/* disable wptr polling */
3336 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3337 
3338 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3339 	       mqd->cp_hqd_eop_base_addr_lo);
3340 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3341 	       mqd->cp_hqd_eop_base_addr_hi);
3342 
3343 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3344 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3345 	       mqd->cp_hqd_eop_control);
3346 
3347 	/* enable doorbell? */
3348 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3349 	       mqd->cp_hqd_pq_doorbell_control);
3350 
3351 	/* disable the queue if it's active */
3352 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3353 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3354 		for (j = 0; j < adev->usec_timeout; j++) {
3355 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3356 				break;
3357 			udelay(1);
3358 		}
3359 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3360 		       mqd->cp_hqd_dequeue_request);
3361 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3362 		       mqd->cp_hqd_pq_rptr);
3363 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3364 		       mqd->cp_hqd_pq_wptr_lo);
3365 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3366 		       mqd->cp_hqd_pq_wptr_hi);
3367 	}
3368 
3369 	/* set the pointer to the MQD */
3370 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3371 	       mqd->cp_mqd_base_addr_lo);
3372 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3373 	       mqd->cp_mqd_base_addr_hi);
3374 
3375 	/* set MQD vmid to 0 */
3376 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3377 	       mqd->cp_mqd_control);
3378 
3379 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3380 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3381 	       mqd->cp_hqd_pq_base_lo);
3382 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3383 	       mqd->cp_hqd_pq_base_hi);
3384 
3385 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3386 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3387 	       mqd->cp_hqd_pq_control);
3388 
3389 	/* set the wb address whether it's enabled or not */
3390 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3391 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3392 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3393 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3394 
3395 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3396 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3397 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3398 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3399 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3400 
3401 	/* enable the doorbell if requested */
3402 	if (ring->use_doorbell) {
3403 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3404 					(adev->doorbell_index.kiq * 2) << 2);
3405 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3406 					(adev->doorbell_index.userqueue_end * 2) << 2);
3407 	}
3408 
3409 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3410 	       mqd->cp_hqd_pq_doorbell_control);
3411 
3412 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3413 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3414 	       mqd->cp_hqd_pq_wptr_lo);
3415 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3416 	       mqd->cp_hqd_pq_wptr_hi);
3417 
3418 	/* set the vmid for the queue */
3419 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3420 
3421 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3422 	       mqd->cp_hqd_persistent_state);
3423 
3424 	/* activate the queue */
3425 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3426 	       mqd->cp_hqd_active);
3427 
3428 	if (ring->use_doorbell)
3429 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3430 
3431 	return 0;
3432 }
3433 
3434 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3435 {
3436 	struct amdgpu_device *adev = ring->adev;
3437 	int j;
3438 
3439 	/* disable the queue if it's active */
3440 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3441 
3442 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3443 
3444 		for (j = 0; j < adev->usec_timeout; j++) {
3445 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3446 				break;
3447 			udelay(1);
3448 		}
3449 
3450 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3451 			DRM_DEBUG("KIQ dequeue request failed.\n");
3452 
3453 			/* Manual disable if dequeue request times out */
3454 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3455 		}
3456 
3457 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3458 		      0);
3459 	}
3460 
3461 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3462 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3463 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3464 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3465 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3466 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3467 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3468 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3469 
3470 	return 0;
3471 }
3472 
3473 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3474 {
3475 	struct amdgpu_device *adev = ring->adev;
3476 	struct v9_mqd *mqd = ring->mqd_ptr;
3477 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3478 
3479 	gfx_v9_0_kiq_setting(ring);
3480 
3481 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3482 		/* reset MQD to a clean status */
3483 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3484 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3485 
3486 		/* reset ring buffer */
3487 		ring->wptr = 0;
3488 		amdgpu_ring_clear_ring(ring);
3489 
3490 		mutex_lock(&adev->srbm_mutex);
3491 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3492 		gfx_v9_0_kiq_init_register(ring);
3493 		soc15_grbm_select(adev, 0, 0, 0, 0);
3494 		mutex_unlock(&adev->srbm_mutex);
3495 	} else {
3496 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3497 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3498 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3499 		mutex_lock(&adev->srbm_mutex);
3500 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3501 		gfx_v9_0_mqd_init(ring);
3502 		gfx_v9_0_kiq_init_register(ring);
3503 		soc15_grbm_select(adev, 0, 0, 0, 0);
3504 		mutex_unlock(&adev->srbm_mutex);
3505 
3506 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3507 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3508 	}
3509 
3510 	return 0;
3511 }
3512 
3513 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3514 {
3515 	struct amdgpu_device *adev = ring->adev;
3516 	struct v9_mqd *mqd = ring->mqd_ptr;
3517 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3518 
3519 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3520 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3521 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3522 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3523 		mutex_lock(&adev->srbm_mutex);
3524 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3525 		gfx_v9_0_mqd_init(ring);
3526 		soc15_grbm_select(adev, 0, 0, 0, 0);
3527 		mutex_unlock(&adev->srbm_mutex);
3528 
3529 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3530 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3531 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3532 		/* reset MQD to a clean status */
3533 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3534 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3535 
3536 		/* reset ring buffer */
3537 		ring->wptr = 0;
3538 		amdgpu_ring_clear_ring(ring);
3539 	} else {
3540 		amdgpu_ring_clear_ring(ring);
3541 	}
3542 
3543 	return 0;
3544 }
3545 
3546 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3547 {
3548 	struct amdgpu_ring *ring;
3549 	int r;
3550 
3551 	ring = &adev->gfx.kiq.ring;
3552 
3553 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3554 	if (unlikely(r != 0))
3555 		return r;
3556 
3557 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3558 	if (unlikely(r != 0))
3559 		return r;
3560 
3561 	gfx_v9_0_kiq_init_queue(ring);
3562 	amdgpu_bo_kunmap(ring->mqd_obj);
3563 	ring->mqd_ptr = NULL;
3564 	amdgpu_bo_unreserve(ring->mqd_obj);
3565 	ring->sched.ready = true;
3566 	return 0;
3567 }
3568 
3569 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3570 {
3571 	struct amdgpu_ring *ring = NULL;
3572 	int r = 0, i;
3573 
3574 	gfx_v9_0_cp_compute_enable(adev, true);
3575 
3576 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3577 		ring = &adev->gfx.compute_ring[i];
3578 
3579 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3580 		if (unlikely(r != 0))
3581 			goto done;
3582 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3583 		if (!r) {
3584 			r = gfx_v9_0_kcq_init_queue(ring);
3585 			amdgpu_bo_kunmap(ring->mqd_obj);
3586 			ring->mqd_ptr = NULL;
3587 		}
3588 		amdgpu_bo_unreserve(ring->mqd_obj);
3589 		if (r)
3590 			goto done;
3591 	}
3592 
3593 	r = gfx_v9_0_kiq_kcq_enable(adev);
3594 done:
3595 	return r;
3596 }
3597 
3598 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3599 {
3600 	int r, i;
3601 	struct amdgpu_ring *ring;
3602 
3603 	if (!(adev->flags & AMD_IS_APU))
3604 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3605 
3606 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3607 		if (adev->asic_type != CHIP_ARCTURUS) {
3608 			/* legacy firmware loading */
3609 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3610 			if (r)
3611 				return r;
3612 		}
3613 
3614 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3615 		if (r)
3616 			return r;
3617 	}
3618 
3619 	r = gfx_v9_0_kiq_resume(adev);
3620 	if (r)
3621 		return r;
3622 
3623 	if (adev->asic_type != CHIP_ARCTURUS) {
3624 		r = gfx_v9_0_cp_gfx_resume(adev);
3625 		if (r)
3626 			return r;
3627 	}
3628 
3629 	r = gfx_v9_0_kcq_resume(adev);
3630 	if (r)
3631 		return r;
3632 
3633 	if (adev->asic_type != CHIP_ARCTURUS) {
3634 		ring = &adev->gfx.gfx_ring[0];
3635 		r = amdgpu_ring_test_helper(ring);
3636 		if (r)
3637 			return r;
3638 	}
3639 
3640 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3641 		ring = &adev->gfx.compute_ring[i];
3642 		amdgpu_ring_test_helper(ring);
3643 	}
3644 
3645 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3646 
3647 	return 0;
3648 }
3649 
3650 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3651 {
3652 	if (adev->asic_type != CHIP_ARCTURUS)
3653 		gfx_v9_0_cp_gfx_enable(adev, enable);
3654 	gfx_v9_0_cp_compute_enable(adev, enable);
3655 }
3656 
3657 static int gfx_v9_0_hw_init(void *handle)
3658 {
3659 	int r;
3660 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3661 
3662 	if (!amdgpu_sriov_vf(adev))
3663 		gfx_v9_0_init_golden_registers(adev);
3664 
3665 	gfx_v9_0_constants_init(adev);
3666 
3667 	r = adev->gfx.rlc.funcs->resume(adev);
3668 	if (r)
3669 		return r;
3670 
3671 	r = gfx_v9_0_cp_resume(adev);
3672 	if (r)
3673 		return r;
3674 
3675 	return r;
3676 }
3677 
3678 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3679 {
3680 	int r, i;
3681 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3682 
3683 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3684 	if (r)
3685 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3686 
3687 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3688 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3689 
3690 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3691 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3692 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3693 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3694 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3695 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3696 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3697 		amdgpu_ring_write(kiq_ring, 0);
3698 		amdgpu_ring_write(kiq_ring, 0);
3699 		amdgpu_ring_write(kiq_ring, 0);
3700 	}
3701 	r = amdgpu_ring_test_helper(kiq_ring);
3702 	if (r)
3703 		DRM_ERROR("KCQ disable failed\n");
3704 
3705 	return r;
3706 }
3707 
3708 static int gfx_v9_0_hw_fini(void *handle)
3709 {
3710 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3711 
3712 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3713 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3714 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3715 
3716 	/* DF freeze and kcq disable will fail */
3717 	if (!amdgpu_ras_intr_triggered())
3718 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3719 		gfx_v9_0_kcq_disable(adev);
3720 
3721 	if (amdgpu_sriov_vf(adev)) {
3722 		gfx_v9_0_cp_gfx_enable(adev, false);
3723 		/* must disable polling for SRIOV when hw finished, otherwise
3724 		 * CPC engine may still keep fetching WB address which is already
3725 		 * invalid after sw finished and trigger DMAR reading error in
3726 		 * hypervisor side.
3727 		 */
3728 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3729 		return 0;
3730 	}
3731 
3732 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3733 	 * otherwise KIQ is hanging when binding back
3734 	 */
3735 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3736 		mutex_lock(&adev->srbm_mutex);
3737 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3738 				adev->gfx.kiq.ring.pipe,
3739 				adev->gfx.kiq.ring.queue, 0);
3740 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3741 		soc15_grbm_select(adev, 0, 0, 0, 0);
3742 		mutex_unlock(&adev->srbm_mutex);
3743 	}
3744 
3745 	gfx_v9_0_cp_enable(adev, false);
3746 	adev->gfx.rlc.funcs->stop(adev);
3747 
3748 	return 0;
3749 }
3750 
3751 static int gfx_v9_0_suspend(void *handle)
3752 {
3753 	return gfx_v9_0_hw_fini(handle);
3754 }
3755 
3756 static int gfx_v9_0_resume(void *handle)
3757 {
3758 	return gfx_v9_0_hw_init(handle);
3759 }
3760 
3761 static bool gfx_v9_0_is_idle(void *handle)
3762 {
3763 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3764 
3765 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3766 				GRBM_STATUS, GUI_ACTIVE))
3767 		return false;
3768 	else
3769 		return true;
3770 }
3771 
3772 static int gfx_v9_0_wait_for_idle(void *handle)
3773 {
3774 	unsigned i;
3775 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3776 
3777 	for (i = 0; i < adev->usec_timeout; i++) {
3778 		if (gfx_v9_0_is_idle(handle))
3779 			return 0;
3780 		udelay(1);
3781 	}
3782 	return -ETIMEDOUT;
3783 }
3784 
3785 static int gfx_v9_0_soft_reset(void *handle)
3786 {
3787 	u32 grbm_soft_reset = 0;
3788 	u32 tmp;
3789 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3790 
3791 	/* GRBM_STATUS */
3792 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3793 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3794 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3795 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3796 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3797 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3798 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3799 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3800 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3801 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3802 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3803 	}
3804 
3805 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3806 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3807 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3808 	}
3809 
3810 	/* GRBM_STATUS2 */
3811 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3812 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3813 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3814 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3815 
3816 
3817 	if (grbm_soft_reset) {
3818 		/* stop the rlc */
3819 		adev->gfx.rlc.funcs->stop(adev);
3820 
3821 		if (adev->asic_type != CHIP_ARCTURUS)
3822 			/* Disable GFX parsing/prefetching */
3823 			gfx_v9_0_cp_gfx_enable(adev, false);
3824 
3825 		/* Disable MEC parsing/prefetching */
3826 		gfx_v9_0_cp_compute_enable(adev, false);
3827 
3828 		if (grbm_soft_reset) {
3829 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3830 			tmp |= grbm_soft_reset;
3831 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3832 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3833 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3834 
3835 			udelay(50);
3836 
3837 			tmp &= ~grbm_soft_reset;
3838 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3839 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3840 		}
3841 
3842 		/* Wait a little for things to settle down */
3843 		udelay(50);
3844 	}
3845 	return 0;
3846 }
3847 
3848 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3849 {
3850 	uint64_t clock;
3851 
3852 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3853 	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3854 		uint32_t tmp, lsb, msb, i = 0;
3855 		do {
3856 			if (i != 0)
3857 				udelay(1);
3858 			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3859 			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3860 			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3861 			i++;
3862 		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3863 		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3864 	} else {
3865 		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3866 		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3867 			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3868 	}
3869 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3870 	return clock;
3871 }
3872 
3873 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3874 					  uint32_t vmid,
3875 					  uint32_t gds_base, uint32_t gds_size,
3876 					  uint32_t gws_base, uint32_t gws_size,
3877 					  uint32_t oa_base, uint32_t oa_size)
3878 {
3879 	struct amdgpu_device *adev = ring->adev;
3880 
3881 	/* GDS Base */
3882 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3883 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3884 				   gds_base);
3885 
3886 	/* GDS Size */
3887 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3888 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3889 				   gds_size);
3890 
3891 	/* GWS */
3892 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3893 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3894 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3895 
3896 	/* OA */
3897 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3898 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3899 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3900 }
3901 
3902 static const u32 vgpr_init_compute_shader[] =
3903 {
3904 	0xb07c0000, 0xbe8000ff,
3905 	0x000000f8, 0xbf110800,
3906 	0x7e000280, 0x7e020280,
3907 	0x7e040280, 0x7e060280,
3908 	0x7e080280, 0x7e0a0280,
3909 	0x7e0c0280, 0x7e0e0280,
3910 	0x80808800, 0xbe803200,
3911 	0xbf84fff5, 0xbf9c0000,
3912 	0xd28c0001, 0x0001007f,
3913 	0xd28d0001, 0x0002027e,
3914 	0x10020288, 0xb8810904,
3915 	0xb7814000, 0xd1196a01,
3916 	0x00000301, 0xbe800087,
3917 	0xbefc00c1, 0xd89c4000,
3918 	0x00020201, 0xd89cc080,
3919 	0x00040401, 0x320202ff,
3920 	0x00000800, 0x80808100,
3921 	0xbf84fff8, 0x7e020280,
3922 	0xbf810000, 0x00000000,
3923 };
3924 
3925 static const u32 sgpr_init_compute_shader[] =
3926 {
3927 	0xb07c0000, 0xbe8000ff,
3928 	0x0000005f, 0xbee50080,
3929 	0xbe812c65, 0xbe822c65,
3930 	0xbe832c65, 0xbe842c65,
3931 	0xbe852c65, 0xb77c0005,
3932 	0x80808500, 0xbf84fff8,
3933 	0xbe800080, 0xbf810000,
3934 };
3935 
3936 static const struct soc15_reg_entry vgpr_init_regs[] = {
3937    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3938    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3939    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3940    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3941    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3942    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3943    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
3944    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3945    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
3946    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3947 };
3948 
3949 static const struct soc15_reg_entry sgpr1_init_regs[] = {
3950    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
3951    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
3952    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
3953    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
3954    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3955    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3956    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
3957    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3958    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
3959    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3960 };
3961 
3962 static const struct soc15_reg_entry sgpr2_init_regs[] = {
3963    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
3964    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
3965    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
3966    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
3967    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3968    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3969    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
3970    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3971    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
3972    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3973 };
3974 
3975 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3976    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3977    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3978    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3979    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3980    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3981    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3982    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3983    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3984    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3985    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3986    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3987    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3988    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3989    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3990    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3991    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3992    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3993    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3994    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3995    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3996    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
3997    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3998    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3999    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4000    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4001    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4002    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4003    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4004    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4005    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4006    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4007    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4008    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4009    { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1},
4010    { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1},
4011    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4012 };
4013 
4014 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4015 {
4016 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4017 	int i, r;
4018 
4019 	/* only support when RAS is enabled */
4020 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4021 		return 0;
4022 
4023 	r = amdgpu_ring_alloc(ring, 7);
4024 	if (r) {
4025 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4026 			ring->name, r);
4027 		return r;
4028 	}
4029 
4030 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4031 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4032 
4033 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4034 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4035 				PACKET3_DMA_DATA_DST_SEL(1) |
4036 				PACKET3_DMA_DATA_SRC_SEL(2) |
4037 				PACKET3_DMA_DATA_ENGINE(0)));
4038 	amdgpu_ring_write(ring, 0);
4039 	amdgpu_ring_write(ring, 0);
4040 	amdgpu_ring_write(ring, 0);
4041 	amdgpu_ring_write(ring, 0);
4042 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4043 				adev->gds.gds_size);
4044 
4045 	amdgpu_ring_commit(ring);
4046 
4047 	for (i = 0; i < adev->usec_timeout; i++) {
4048 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4049 			break;
4050 		udelay(1);
4051 	}
4052 
4053 	if (i >= adev->usec_timeout)
4054 		r = -ETIMEDOUT;
4055 
4056 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4057 
4058 	return r;
4059 }
4060 
4061 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4062 {
4063 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4064 	struct amdgpu_ib ib;
4065 	struct dma_fence *f = NULL;
4066 	int r, i, j, k;
4067 	unsigned total_size, vgpr_offset, sgpr_offset;
4068 	u64 gpu_addr;
4069 
4070 	/* only support when RAS is enabled */
4071 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4072 		return 0;
4073 
4074 	/* bail if the compute ring is not ready */
4075 	if (!ring->sched.ready)
4076 		return 0;
4077 
4078 	total_size =
4079 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4080 	total_size +=
4081 		((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4;
4082 	total_size +=
4083 		((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4;
4084 	total_size = ALIGN(total_size, 256);
4085 	vgpr_offset = total_size;
4086 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4087 	sgpr_offset = total_size;
4088 	total_size += sizeof(sgpr_init_compute_shader);
4089 
4090 	/* allocate an indirect buffer to put the commands in */
4091 	memset(&ib, 0, sizeof(ib));
4092 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4093 	if (r) {
4094 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4095 		return r;
4096 	}
4097 
4098 	/* load the compute shaders */
4099 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4100 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4101 
4102 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4103 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4104 
4105 	/* init the ib length to 0 */
4106 	ib.length_dw = 0;
4107 
4108 	/* VGPR */
4109 	/* write the register state for the compute dispatch */
4110 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4111 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4112 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4113 								- PACKET3_SET_SH_REG_START;
4114 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4115 	}
4116 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4117 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4118 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4119 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4120 							- PACKET3_SET_SH_REG_START;
4121 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4122 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4123 
4124 	/* write dispatch packet */
4125 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4126 	ib.ptr[ib.length_dw++] = 0x40*2; /* x */
4127 	ib.ptr[ib.length_dw++] = 1; /* y */
4128 	ib.ptr[ib.length_dw++] = 1; /* z */
4129 	ib.ptr[ib.length_dw++] =
4130 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4131 
4132 	/* write CS partial flush packet */
4133 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4134 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4135 
4136 	/* SGPR1 */
4137 	/* write the register state for the compute dispatch */
4138 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i++) {
4139 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4140 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4141 								- PACKET3_SET_SH_REG_START;
4142 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4143 	}
4144 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4145 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4146 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4147 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4148 							- PACKET3_SET_SH_REG_START;
4149 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4150 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4151 
4152 	/* write dispatch packet */
4153 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4154 	ib.ptr[ib.length_dw++] = 0xA0*2; /* x */
4155 	ib.ptr[ib.length_dw++] = 1; /* y */
4156 	ib.ptr[ib.length_dw++] = 1; /* z */
4157 	ib.ptr[ib.length_dw++] =
4158 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4159 
4160 	/* write CS partial flush packet */
4161 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4162 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4163 
4164 	/* SGPR2 */
4165 	/* write the register state for the compute dispatch */
4166 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i++) {
4167 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4168 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4169 								- PACKET3_SET_SH_REG_START;
4170 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4171 	}
4172 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4173 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4174 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4175 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4176 							- PACKET3_SET_SH_REG_START;
4177 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4178 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4179 
4180 	/* write dispatch packet */
4181 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4182 	ib.ptr[ib.length_dw++] = 0xA0*2; /* x */
4183 	ib.ptr[ib.length_dw++] = 1; /* y */
4184 	ib.ptr[ib.length_dw++] = 1; /* z */
4185 	ib.ptr[ib.length_dw++] =
4186 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4187 
4188 	/* write CS partial flush packet */
4189 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4190 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4191 
4192 	/* shedule the ib on the ring */
4193 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4194 	if (r) {
4195 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4196 		goto fail;
4197 	}
4198 
4199 	/* wait for the GPU to finish processing the IB */
4200 	r = dma_fence_wait(f, false);
4201 	if (r) {
4202 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4203 		goto fail;
4204 	}
4205 
4206 	/* read back registers to clear the counters */
4207 	mutex_lock(&adev->grbm_idx_mutex);
4208 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4209 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4210 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4211 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4212 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4213 			}
4214 		}
4215 	}
4216 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4217 	mutex_unlock(&adev->grbm_idx_mutex);
4218 
4219 fail:
4220 	amdgpu_ib_free(adev, &ib, NULL);
4221 	dma_fence_put(f);
4222 
4223 	return r;
4224 }
4225 
4226 static int gfx_v9_0_early_init(void *handle)
4227 {
4228 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4229 
4230 	if (adev->asic_type == CHIP_ARCTURUS)
4231 		adev->gfx.num_gfx_rings = 0;
4232 	else
4233 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4234 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4235 	gfx_v9_0_set_ring_funcs(adev);
4236 	gfx_v9_0_set_irq_funcs(adev);
4237 	gfx_v9_0_set_gds_init(adev);
4238 	gfx_v9_0_set_rlc_funcs(adev);
4239 
4240 	return 0;
4241 }
4242 
4243 static int gfx_v9_0_ecc_late_init(void *handle)
4244 {
4245 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4246 	int r;
4247 
4248 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4249 	if (r)
4250 		return r;
4251 
4252 	/* requires IBs so do in late init after IB pool is initialized */
4253 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4254 	if (r)
4255 		return r;
4256 
4257 	r = amdgpu_gfx_ras_late_init(adev);
4258 	if (r)
4259 		return r;
4260 
4261 	return 0;
4262 }
4263 
4264 static int gfx_v9_0_late_init(void *handle)
4265 {
4266 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4267 	int r;
4268 
4269 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4270 	if (r)
4271 		return r;
4272 
4273 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4274 	if (r)
4275 		return r;
4276 
4277 	r = gfx_v9_0_ecc_late_init(handle);
4278 	if (r)
4279 		return r;
4280 
4281 	return 0;
4282 }
4283 
4284 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4285 {
4286 	uint32_t rlc_setting;
4287 
4288 	/* if RLC is not enabled, do nothing */
4289 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4290 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4291 		return false;
4292 
4293 	return true;
4294 }
4295 
4296 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4297 {
4298 	uint32_t data;
4299 	unsigned i;
4300 
4301 	data = RLC_SAFE_MODE__CMD_MASK;
4302 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4303 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4304 
4305 	/* wait for RLC_SAFE_MODE */
4306 	for (i = 0; i < adev->usec_timeout; i++) {
4307 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4308 			break;
4309 		udelay(1);
4310 	}
4311 }
4312 
4313 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4314 {
4315 	uint32_t data;
4316 
4317 	data = RLC_SAFE_MODE__CMD_MASK;
4318 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4319 }
4320 
4321 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4322 						bool enable)
4323 {
4324 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4325 
4326 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4327 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4328 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4329 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4330 	} else {
4331 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4332 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4333 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4334 	}
4335 
4336 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4337 }
4338 
4339 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4340 						bool enable)
4341 {
4342 	/* TODO: double check if we need to perform under safe mode */
4343 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4344 
4345 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4346 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4347 	else
4348 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4349 
4350 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4351 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4352 	else
4353 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4354 
4355 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4356 }
4357 
4358 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4359 						      bool enable)
4360 {
4361 	uint32_t data, def;
4362 
4363 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4364 
4365 	/* It is disabled by HW by default */
4366 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4367 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4368 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4369 
4370 		if (adev->asic_type != CHIP_VEGA12)
4371 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4372 
4373 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4374 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4375 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4376 
4377 		/* only for Vega10 & Raven1 */
4378 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4379 
4380 		if (def != data)
4381 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4382 
4383 		/* MGLS is a global flag to control all MGLS in GFX */
4384 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4385 			/* 2 - RLC memory Light sleep */
4386 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4387 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4388 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4389 				if (def != data)
4390 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4391 			}
4392 			/* 3 - CP memory Light sleep */
4393 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4394 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4395 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4396 				if (def != data)
4397 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4398 			}
4399 		}
4400 	} else {
4401 		/* 1 - MGCG_OVERRIDE */
4402 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4403 
4404 		if (adev->asic_type != CHIP_VEGA12)
4405 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4406 
4407 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4408 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4409 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4410 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4411 
4412 		if (def != data)
4413 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4414 
4415 		/* 2 - disable MGLS in RLC */
4416 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4417 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4418 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4419 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4420 		}
4421 
4422 		/* 3 - disable MGLS in CP */
4423 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4424 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4425 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4426 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4427 		}
4428 	}
4429 
4430 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4431 }
4432 
4433 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4434 					   bool enable)
4435 {
4436 	uint32_t data, def;
4437 
4438 	if (adev->asic_type == CHIP_ARCTURUS)
4439 		return;
4440 
4441 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4442 
4443 	/* Enable 3D CGCG/CGLS */
4444 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4445 		/* write cmd to clear cgcg/cgls ov */
4446 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4447 		/* unset CGCG override */
4448 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4449 		/* update CGCG and CGLS override bits */
4450 		if (def != data)
4451 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4452 
4453 		/* enable 3Dcgcg FSM(0x0000363f) */
4454 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4455 
4456 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4457 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4458 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4459 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4460 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4461 		if (def != data)
4462 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4463 
4464 		/* set IDLE_POLL_COUNT(0x00900100) */
4465 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4466 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4467 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4468 		if (def != data)
4469 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4470 	} else {
4471 		/* Disable CGCG/CGLS */
4472 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4473 		/* disable cgcg, cgls should be disabled */
4474 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4475 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4476 		/* disable cgcg and cgls in FSM */
4477 		if (def != data)
4478 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4479 	}
4480 
4481 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4482 }
4483 
4484 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4485 						      bool enable)
4486 {
4487 	uint32_t def, data;
4488 
4489 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4490 
4491 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4492 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4493 		/* unset CGCG override */
4494 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4495 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4496 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4497 		else
4498 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4499 		/* update CGCG and CGLS override bits */
4500 		if (def != data)
4501 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4502 
4503 		/* enable cgcg FSM(0x0000363F) */
4504 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4505 
4506 		if (adev->asic_type == CHIP_ARCTURUS)
4507 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4508 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4509 		else
4510 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4511 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4512 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4513 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4514 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4515 		if (def != data)
4516 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4517 
4518 		/* set IDLE_POLL_COUNT(0x00900100) */
4519 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4520 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4521 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4522 		if (def != data)
4523 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4524 	} else {
4525 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4526 		/* reset CGCG/CGLS bits */
4527 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4528 		/* disable cgcg and cgls in FSM */
4529 		if (def != data)
4530 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4531 	}
4532 
4533 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4534 }
4535 
4536 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4537 					    bool enable)
4538 {
4539 	if (enable) {
4540 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4541 		 * ===  MGCG + MGLS ===
4542 		 */
4543 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4544 		/* ===  CGCG /CGLS for GFX 3D Only === */
4545 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4546 		/* ===  CGCG + CGLS === */
4547 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4548 	} else {
4549 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4550 		 * ===  CGCG + CGLS ===
4551 		 */
4552 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4553 		/* ===  CGCG /CGLS for GFX 3D Only === */
4554 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4555 		/* ===  MGCG + MGLS === */
4556 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4557 	}
4558 	return 0;
4559 }
4560 
4561 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4562 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4563 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4564 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4565 	.init = gfx_v9_0_rlc_init,
4566 	.get_csb_size = gfx_v9_0_get_csb_size,
4567 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4568 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4569 	.resume = gfx_v9_0_rlc_resume,
4570 	.stop = gfx_v9_0_rlc_stop,
4571 	.reset = gfx_v9_0_rlc_reset,
4572 	.start = gfx_v9_0_rlc_start
4573 };
4574 
4575 static int gfx_v9_0_set_powergating_state(void *handle,
4576 					  enum amd_powergating_state state)
4577 {
4578 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4579 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4580 
4581 	switch (adev->asic_type) {
4582 	case CHIP_RAVEN:
4583 	case CHIP_RENOIR:
4584 		if (!enable) {
4585 			amdgpu_gfx_off_ctrl(adev, false);
4586 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4587 		}
4588 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4589 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4590 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4591 		} else {
4592 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4593 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4594 		}
4595 
4596 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4597 			gfx_v9_0_enable_cp_power_gating(adev, true);
4598 		else
4599 			gfx_v9_0_enable_cp_power_gating(adev, false);
4600 
4601 		/* update gfx cgpg state */
4602 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4603 
4604 		/* update mgcg state */
4605 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4606 
4607 		if (enable)
4608 			amdgpu_gfx_off_ctrl(adev, true);
4609 		break;
4610 	case CHIP_VEGA12:
4611 		if (!enable) {
4612 			amdgpu_gfx_off_ctrl(adev, false);
4613 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4614 		} else {
4615 			amdgpu_gfx_off_ctrl(adev, true);
4616 		}
4617 		break;
4618 	default:
4619 		break;
4620 	}
4621 
4622 	return 0;
4623 }
4624 
4625 static int gfx_v9_0_set_clockgating_state(void *handle,
4626 					  enum amd_clockgating_state state)
4627 {
4628 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4629 
4630 	if (amdgpu_sriov_vf(adev))
4631 		return 0;
4632 
4633 	switch (adev->asic_type) {
4634 	case CHIP_VEGA10:
4635 	case CHIP_VEGA12:
4636 	case CHIP_VEGA20:
4637 	case CHIP_RAVEN:
4638 	case CHIP_ARCTURUS:
4639 	case CHIP_RENOIR:
4640 		gfx_v9_0_update_gfx_clock_gating(adev,
4641 						 state == AMD_CG_STATE_GATE ? true : false);
4642 		break;
4643 	default:
4644 		break;
4645 	}
4646 	return 0;
4647 }
4648 
4649 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4650 {
4651 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4652 	int data;
4653 
4654 	if (amdgpu_sriov_vf(adev))
4655 		*flags = 0;
4656 
4657 	/* AMD_CG_SUPPORT_GFX_MGCG */
4658 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4659 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4660 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4661 
4662 	/* AMD_CG_SUPPORT_GFX_CGCG */
4663 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4664 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4665 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4666 
4667 	/* AMD_CG_SUPPORT_GFX_CGLS */
4668 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4669 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4670 
4671 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4672 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4673 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4674 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4675 
4676 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4677 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4678 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4679 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4680 
4681 	if (adev->asic_type != CHIP_ARCTURUS) {
4682 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4683 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4684 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4685 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4686 
4687 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4688 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4689 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4690 	}
4691 }
4692 
4693 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4694 {
4695 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4696 }
4697 
4698 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4699 {
4700 	struct amdgpu_device *adev = ring->adev;
4701 	u64 wptr;
4702 
4703 	/* XXX check if swapping is necessary on BE */
4704 	if (ring->use_doorbell) {
4705 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4706 	} else {
4707 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4708 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4709 	}
4710 
4711 	return wptr;
4712 }
4713 
4714 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4715 {
4716 	struct amdgpu_device *adev = ring->adev;
4717 
4718 	if (ring->use_doorbell) {
4719 		/* XXX check if swapping is necessary on BE */
4720 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4721 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4722 	} else {
4723 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4724 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4725 	}
4726 }
4727 
4728 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4729 {
4730 	struct amdgpu_device *adev = ring->adev;
4731 	u32 ref_and_mask, reg_mem_engine;
4732 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4733 
4734 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4735 		switch (ring->me) {
4736 		case 1:
4737 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4738 			break;
4739 		case 2:
4740 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4741 			break;
4742 		default:
4743 			return;
4744 		}
4745 		reg_mem_engine = 0;
4746 	} else {
4747 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4748 		reg_mem_engine = 1; /* pfp */
4749 	}
4750 
4751 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4752 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4753 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4754 			      ref_and_mask, ref_and_mask, 0x20);
4755 }
4756 
4757 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4758 					struct amdgpu_job *job,
4759 					struct amdgpu_ib *ib,
4760 					uint32_t flags)
4761 {
4762 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4763 	u32 header, control = 0;
4764 
4765 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4766 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4767 	else
4768 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4769 
4770 	control |= ib->length_dw | (vmid << 24);
4771 
4772 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4773 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4774 
4775 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4776 			gfx_v9_0_ring_emit_de_meta(ring);
4777 	}
4778 
4779 	amdgpu_ring_write(ring, header);
4780 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4781 	amdgpu_ring_write(ring,
4782 #ifdef __BIG_ENDIAN
4783 		(2 << 0) |
4784 #endif
4785 		lower_32_bits(ib->gpu_addr));
4786 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4787 	amdgpu_ring_write(ring, control);
4788 }
4789 
4790 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4791 					  struct amdgpu_job *job,
4792 					  struct amdgpu_ib *ib,
4793 					  uint32_t flags)
4794 {
4795 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4796 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4797 
4798 	/* Currently, there is a high possibility to get wave ID mismatch
4799 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4800 	 * different wave IDs than the GDS expects. This situation happens
4801 	 * randomly when at least 5 compute pipes use GDS ordered append.
4802 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4803 	 * Those are probably bugs somewhere else in the kernel driver.
4804 	 *
4805 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4806 	 * GDS to 0 for this ring (me/pipe).
4807 	 */
4808 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4809 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4810 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4811 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4812 	}
4813 
4814 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4815 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4816 	amdgpu_ring_write(ring,
4817 #ifdef __BIG_ENDIAN
4818 				(2 << 0) |
4819 #endif
4820 				lower_32_bits(ib->gpu_addr));
4821 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4822 	amdgpu_ring_write(ring, control);
4823 }
4824 
4825 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4826 				     u64 seq, unsigned flags)
4827 {
4828 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4829 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4830 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4831 
4832 	/* RELEASE_MEM - flush caches, send int */
4833 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4834 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4835 					       EOP_TC_NC_ACTION_EN) :
4836 					      (EOP_TCL1_ACTION_EN |
4837 					       EOP_TC_ACTION_EN |
4838 					       EOP_TC_WB_ACTION_EN |
4839 					       EOP_TC_MD_ACTION_EN)) |
4840 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4841 				 EVENT_INDEX(5)));
4842 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4843 
4844 	/*
4845 	 * the address should be Qword aligned if 64bit write, Dword
4846 	 * aligned if only send 32bit data low (discard data high)
4847 	 */
4848 	if (write64bit)
4849 		BUG_ON(addr & 0x7);
4850 	else
4851 		BUG_ON(addr & 0x3);
4852 	amdgpu_ring_write(ring, lower_32_bits(addr));
4853 	amdgpu_ring_write(ring, upper_32_bits(addr));
4854 	amdgpu_ring_write(ring, lower_32_bits(seq));
4855 	amdgpu_ring_write(ring, upper_32_bits(seq));
4856 	amdgpu_ring_write(ring, 0);
4857 }
4858 
4859 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4860 {
4861 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4862 	uint32_t seq = ring->fence_drv.sync_seq;
4863 	uint64_t addr = ring->fence_drv.gpu_addr;
4864 
4865 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4866 			      lower_32_bits(addr), upper_32_bits(addr),
4867 			      seq, 0xffffffff, 4);
4868 }
4869 
4870 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4871 					unsigned vmid, uint64_t pd_addr)
4872 {
4873 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4874 
4875 	/* compute doesn't have PFP */
4876 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4877 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4878 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4879 		amdgpu_ring_write(ring, 0x0);
4880 	}
4881 }
4882 
4883 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4884 {
4885 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4886 }
4887 
4888 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4889 {
4890 	u64 wptr;
4891 
4892 	/* XXX check if swapping is necessary on BE */
4893 	if (ring->use_doorbell)
4894 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4895 	else
4896 		BUG();
4897 	return wptr;
4898 }
4899 
4900 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4901 					   bool acquire)
4902 {
4903 	struct amdgpu_device *adev = ring->adev;
4904 	int pipe_num, tmp, reg;
4905 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4906 
4907 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4908 
4909 	/* first me only has 2 entries, GFX and HP3D */
4910 	if (ring->me > 0)
4911 		pipe_num -= 2;
4912 
4913 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4914 	tmp = RREG32(reg);
4915 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4916 	WREG32(reg, tmp);
4917 }
4918 
4919 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4920 					    struct amdgpu_ring *ring,
4921 					    bool acquire)
4922 {
4923 	int i, pipe;
4924 	bool reserve;
4925 	struct amdgpu_ring *iring;
4926 
4927 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4928 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4929 	if (acquire)
4930 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4931 	else
4932 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4933 
4934 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4935 		/* Clear all reservations - everyone reacquires all resources */
4936 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4937 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4938 						       true);
4939 
4940 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4941 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4942 						       true);
4943 	} else {
4944 		/* Lower all pipes without a current reservation */
4945 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4946 			iring = &adev->gfx.gfx_ring[i];
4947 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4948 							   iring->me,
4949 							   iring->pipe,
4950 							   0);
4951 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4952 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4953 		}
4954 
4955 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4956 			iring = &adev->gfx.compute_ring[i];
4957 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4958 							   iring->me,
4959 							   iring->pipe,
4960 							   0);
4961 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4962 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4963 		}
4964 	}
4965 
4966 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4967 }
4968 
4969 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4970 				      struct amdgpu_ring *ring,
4971 				      bool acquire)
4972 {
4973 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4974 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4975 
4976 	mutex_lock(&adev->srbm_mutex);
4977 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4978 
4979 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4980 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4981 
4982 	soc15_grbm_select(adev, 0, 0, 0, 0);
4983 	mutex_unlock(&adev->srbm_mutex);
4984 }
4985 
4986 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4987 					       enum drm_sched_priority priority)
4988 {
4989 	struct amdgpu_device *adev = ring->adev;
4990 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4991 
4992 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4993 		return;
4994 
4995 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4996 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4997 }
4998 
4999 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5000 {
5001 	struct amdgpu_device *adev = ring->adev;
5002 
5003 	/* XXX check if swapping is necessary on BE */
5004 	if (ring->use_doorbell) {
5005 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5006 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5007 	} else{
5008 		BUG(); /* only DOORBELL method supported on gfx9 now */
5009 	}
5010 }
5011 
5012 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5013 					 u64 seq, unsigned int flags)
5014 {
5015 	struct amdgpu_device *adev = ring->adev;
5016 
5017 	/* we only allocate 32bit for each seq wb address */
5018 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5019 
5020 	/* write fence seq to the "addr" */
5021 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5022 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5023 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5024 	amdgpu_ring_write(ring, lower_32_bits(addr));
5025 	amdgpu_ring_write(ring, upper_32_bits(addr));
5026 	amdgpu_ring_write(ring, lower_32_bits(seq));
5027 
5028 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5029 		/* set register to trigger INT */
5030 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5031 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5032 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5033 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5034 		amdgpu_ring_write(ring, 0);
5035 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5036 	}
5037 }
5038 
5039 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5040 {
5041 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5042 	amdgpu_ring_write(ring, 0);
5043 }
5044 
5045 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5046 {
5047 	struct v9_ce_ib_state ce_payload = {0};
5048 	uint64_t csa_addr;
5049 	int cnt;
5050 
5051 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5052 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5053 
5054 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5055 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5056 				 WRITE_DATA_DST_SEL(8) |
5057 				 WR_CONFIRM) |
5058 				 WRITE_DATA_CACHE_POLICY(0));
5059 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5060 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5061 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5062 }
5063 
5064 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5065 {
5066 	struct v9_de_ib_state de_payload = {0};
5067 	uint64_t csa_addr, gds_addr;
5068 	int cnt;
5069 
5070 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5071 	gds_addr = csa_addr + 4096;
5072 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5073 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5074 
5075 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5076 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5077 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5078 				 WRITE_DATA_DST_SEL(8) |
5079 				 WR_CONFIRM) |
5080 				 WRITE_DATA_CACHE_POLICY(0));
5081 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5082 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5083 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5084 }
5085 
5086 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5087 {
5088 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5089 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5090 }
5091 
5092 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5093 {
5094 	uint32_t dw2 = 0;
5095 
5096 	if (amdgpu_sriov_vf(ring->adev))
5097 		gfx_v9_0_ring_emit_ce_meta(ring);
5098 
5099 	gfx_v9_0_ring_emit_tmz(ring, true);
5100 
5101 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5102 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5103 		/* set load_global_config & load_global_uconfig */
5104 		dw2 |= 0x8001;
5105 		/* set load_cs_sh_regs */
5106 		dw2 |= 0x01000000;
5107 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5108 		dw2 |= 0x10002;
5109 
5110 		/* set load_ce_ram if preamble presented */
5111 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5112 			dw2 |= 0x10000000;
5113 	} else {
5114 		/* still load_ce_ram if this is the first time preamble presented
5115 		 * although there is no context switch happens.
5116 		 */
5117 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5118 			dw2 |= 0x10000000;
5119 	}
5120 
5121 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5122 	amdgpu_ring_write(ring, dw2);
5123 	amdgpu_ring_write(ring, 0);
5124 }
5125 
5126 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5127 {
5128 	unsigned ret;
5129 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5130 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5131 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5132 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5133 	ret = ring->wptr & ring->buf_mask;
5134 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5135 	return ret;
5136 }
5137 
5138 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5139 {
5140 	unsigned cur;
5141 	BUG_ON(offset > ring->buf_mask);
5142 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5143 
5144 	cur = (ring->wptr & ring->buf_mask) - 1;
5145 	if (likely(cur > offset))
5146 		ring->ring[offset] = cur - offset;
5147 	else
5148 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5149 }
5150 
5151 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5152 {
5153 	struct amdgpu_device *adev = ring->adev;
5154 
5155 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5156 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5157 				(5 << 8) |	/* dst: memory */
5158 				(1 << 20));	/* write confirm */
5159 	amdgpu_ring_write(ring, reg);
5160 	amdgpu_ring_write(ring, 0);
5161 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5162 				adev->virt.reg_val_offs * 4));
5163 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5164 				adev->virt.reg_val_offs * 4));
5165 }
5166 
5167 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5168 				    uint32_t val)
5169 {
5170 	uint32_t cmd = 0;
5171 
5172 	switch (ring->funcs->type) {
5173 	case AMDGPU_RING_TYPE_GFX:
5174 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5175 		break;
5176 	case AMDGPU_RING_TYPE_KIQ:
5177 		cmd = (1 << 16); /* no inc addr */
5178 		break;
5179 	default:
5180 		cmd = WR_CONFIRM;
5181 		break;
5182 	}
5183 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5184 	amdgpu_ring_write(ring, cmd);
5185 	amdgpu_ring_write(ring, reg);
5186 	amdgpu_ring_write(ring, 0);
5187 	amdgpu_ring_write(ring, val);
5188 }
5189 
5190 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5191 					uint32_t val, uint32_t mask)
5192 {
5193 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5194 }
5195 
5196 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5197 						  uint32_t reg0, uint32_t reg1,
5198 						  uint32_t ref, uint32_t mask)
5199 {
5200 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5201 	struct amdgpu_device *adev = ring->adev;
5202 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5203 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5204 
5205 	if (fw_version_ok)
5206 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5207 				      ref, mask, 0x20);
5208 	else
5209 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5210 							   ref, mask);
5211 }
5212 
5213 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5214 {
5215 	struct amdgpu_device *adev = ring->adev;
5216 	uint32_t value = 0;
5217 
5218 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5219 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5220 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5221 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5222 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5223 }
5224 
5225 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5226 						 enum amdgpu_interrupt_state state)
5227 {
5228 	switch (state) {
5229 	case AMDGPU_IRQ_STATE_DISABLE:
5230 	case AMDGPU_IRQ_STATE_ENABLE:
5231 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5232 			       TIME_STAMP_INT_ENABLE,
5233 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5234 		break;
5235 	default:
5236 		break;
5237 	}
5238 }
5239 
5240 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5241 						     int me, int pipe,
5242 						     enum amdgpu_interrupt_state state)
5243 {
5244 	u32 mec_int_cntl, mec_int_cntl_reg;
5245 
5246 	/*
5247 	 * amdgpu controls only the first MEC. That's why this function only
5248 	 * handles the setting of interrupts for this specific MEC. All other
5249 	 * pipes' interrupts are set by amdkfd.
5250 	 */
5251 
5252 	if (me == 1) {
5253 		switch (pipe) {
5254 		case 0:
5255 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5256 			break;
5257 		case 1:
5258 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5259 			break;
5260 		case 2:
5261 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5262 			break;
5263 		case 3:
5264 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5265 			break;
5266 		default:
5267 			DRM_DEBUG("invalid pipe %d\n", pipe);
5268 			return;
5269 		}
5270 	} else {
5271 		DRM_DEBUG("invalid me %d\n", me);
5272 		return;
5273 	}
5274 
5275 	switch (state) {
5276 	case AMDGPU_IRQ_STATE_DISABLE:
5277 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5278 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5279 					     TIME_STAMP_INT_ENABLE, 0);
5280 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5281 		break;
5282 	case AMDGPU_IRQ_STATE_ENABLE:
5283 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5284 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5285 					     TIME_STAMP_INT_ENABLE, 1);
5286 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5287 		break;
5288 	default:
5289 		break;
5290 	}
5291 }
5292 
5293 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5294 					     struct amdgpu_irq_src *source,
5295 					     unsigned type,
5296 					     enum amdgpu_interrupt_state state)
5297 {
5298 	switch (state) {
5299 	case AMDGPU_IRQ_STATE_DISABLE:
5300 	case AMDGPU_IRQ_STATE_ENABLE:
5301 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5302 			       PRIV_REG_INT_ENABLE,
5303 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5304 		break;
5305 	default:
5306 		break;
5307 	}
5308 
5309 	return 0;
5310 }
5311 
5312 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5313 					      struct amdgpu_irq_src *source,
5314 					      unsigned type,
5315 					      enum amdgpu_interrupt_state state)
5316 {
5317 	switch (state) {
5318 	case AMDGPU_IRQ_STATE_DISABLE:
5319 	case AMDGPU_IRQ_STATE_ENABLE:
5320 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5321 			       PRIV_INSTR_INT_ENABLE,
5322 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5323 	default:
5324 		break;
5325 	}
5326 
5327 	return 0;
5328 }
5329 
5330 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5331 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5332 			CP_ECC_ERROR_INT_ENABLE, 1)
5333 
5334 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5335 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5336 			CP_ECC_ERROR_INT_ENABLE, 0)
5337 
5338 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5339 					      struct amdgpu_irq_src *source,
5340 					      unsigned type,
5341 					      enum amdgpu_interrupt_state state)
5342 {
5343 	switch (state) {
5344 	case AMDGPU_IRQ_STATE_DISABLE:
5345 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5346 				CP_ECC_ERROR_INT_ENABLE, 0);
5347 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5348 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5349 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5350 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5351 		break;
5352 
5353 	case AMDGPU_IRQ_STATE_ENABLE:
5354 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5355 				CP_ECC_ERROR_INT_ENABLE, 1);
5356 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5357 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5358 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5359 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5360 		break;
5361 	default:
5362 		break;
5363 	}
5364 
5365 	return 0;
5366 }
5367 
5368 
5369 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5370 					    struct amdgpu_irq_src *src,
5371 					    unsigned type,
5372 					    enum amdgpu_interrupt_state state)
5373 {
5374 	switch (type) {
5375 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5376 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5377 		break;
5378 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5379 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5380 		break;
5381 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5382 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5383 		break;
5384 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5385 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5386 		break;
5387 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5388 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5389 		break;
5390 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5391 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5392 		break;
5393 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5394 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5395 		break;
5396 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5397 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5398 		break;
5399 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5400 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5401 		break;
5402 	default:
5403 		break;
5404 	}
5405 	return 0;
5406 }
5407 
5408 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5409 			    struct amdgpu_irq_src *source,
5410 			    struct amdgpu_iv_entry *entry)
5411 {
5412 	int i;
5413 	u8 me_id, pipe_id, queue_id;
5414 	struct amdgpu_ring *ring;
5415 
5416 	DRM_DEBUG("IH: CP EOP\n");
5417 	me_id = (entry->ring_id & 0x0c) >> 2;
5418 	pipe_id = (entry->ring_id & 0x03) >> 0;
5419 	queue_id = (entry->ring_id & 0x70) >> 4;
5420 
5421 	switch (me_id) {
5422 	case 0:
5423 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5424 		break;
5425 	case 1:
5426 	case 2:
5427 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5428 			ring = &adev->gfx.compute_ring[i];
5429 			/* Per-queue interrupt is supported for MEC starting from VI.
5430 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5431 			  */
5432 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5433 				amdgpu_fence_process(ring);
5434 		}
5435 		break;
5436 	}
5437 	return 0;
5438 }
5439 
5440 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5441 			   struct amdgpu_iv_entry *entry)
5442 {
5443 	u8 me_id, pipe_id, queue_id;
5444 	struct amdgpu_ring *ring;
5445 	int i;
5446 
5447 	me_id = (entry->ring_id & 0x0c) >> 2;
5448 	pipe_id = (entry->ring_id & 0x03) >> 0;
5449 	queue_id = (entry->ring_id & 0x70) >> 4;
5450 
5451 	switch (me_id) {
5452 	case 0:
5453 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5454 		break;
5455 	case 1:
5456 	case 2:
5457 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5458 			ring = &adev->gfx.compute_ring[i];
5459 			if (ring->me == me_id && ring->pipe == pipe_id &&
5460 			    ring->queue == queue_id)
5461 				drm_sched_fault(&ring->sched);
5462 		}
5463 		break;
5464 	}
5465 }
5466 
5467 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5468 				 struct amdgpu_irq_src *source,
5469 				 struct amdgpu_iv_entry *entry)
5470 {
5471 	DRM_ERROR("Illegal register access in command stream\n");
5472 	gfx_v9_0_fault(adev, entry);
5473 	return 0;
5474 }
5475 
5476 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5477 				  struct amdgpu_irq_src *source,
5478 				  struct amdgpu_iv_entry *entry)
5479 {
5480 	DRM_ERROR("Illegal instruction in command stream\n");
5481 	gfx_v9_0_fault(adev, entry);
5482 	return 0;
5483 }
5484 
5485 
5486 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5487 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5488 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5489 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5490 	},
5491 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5492 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5493 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5494 	},
5495 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5496 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5497 	  0, 0
5498 	},
5499 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5500 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5501 	  0, 0
5502 	},
5503 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5504 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5505 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5506 	},
5507 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5508 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5509 	  0, 0
5510 	},
5511 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5512 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5513 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5514 	},
5515 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5516 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5517 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5518 	},
5519 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5520 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5521 	  0, 0
5522 	},
5523 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5524 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5525 	  0, 0
5526 	},
5527 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5528 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5529 	  0, 0
5530 	},
5531 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5532 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5533 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5534 	},
5535 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5536 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5537 	  0, 0
5538 	},
5539 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5540 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5541 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5542 	},
5543 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5544 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5545 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5546 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5547 	},
5548 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5549 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5550 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5551 	  0, 0
5552 	},
5553 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5554 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5555 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5556 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5557 	},
5558 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5559 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5560 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5561 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5562 	},
5563 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5564 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5565 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5566 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5567 	},
5568 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5569 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5570 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5571 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5572 	},
5573 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5574 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5575 	  0, 0
5576 	},
5577 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5578 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5579 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5580 	},
5581 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5582 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5583 	  0, 0
5584 	},
5585 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5586 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5587 	  0, 0
5588 	},
5589 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5590 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5591 	  0, 0
5592 	},
5593 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5594 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5595 	  0, 0
5596 	},
5597 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5598 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5599 	  0, 0
5600 	},
5601 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5602 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5603 	  0, 0
5604 	},
5605 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5606 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5607 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5608 	},
5609 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5610 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5611 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5612 	},
5613 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5614 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5615 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5616 	},
5617 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5618 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5619 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5620 	},
5621 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5622 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5623 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5624 	},
5625 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5626 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5627 	  0, 0
5628 	},
5629 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5630 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5631 	  0, 0
5632 	},
5633 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5634 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5635 	  0, 0
5636 	},
5637 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5638 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5639 	  0, 0
5640 	},
5641 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5642 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5643 	  0, 0
5644 	},
5645 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5646 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5647 	  0, 0
5648 	},
5649 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5650 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5651 	  0, 0
5652 	},
5653 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5654 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5655 	  0, 0
5656 	},
5657 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5658 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5659 	  0, 0
5660 	},
5661 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5662 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5663 	  0, 0
5664 	},
5665 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5666 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5667 	  0, 0
5668 	},
5669 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5670 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5671 	  0, 0
5672 	},
5673 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5674 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5675 	  0, 0
5676 	},
5677 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5678 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5679 	  0, 0
5680 	},
5681 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5682 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5683 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5684 	},
5685 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5686 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5687 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5688 	},
5689 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5690 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5691 	  0, 0
5692 	},
5693 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5694 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5695 	  0, 0
5696 	},
5697 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5698 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5699 	  0, 0
5700 	},
5701 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5702 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5703 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5704 	},
5705 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5706 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5707 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5708 	},
5709 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5710 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5711 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5712 	},
5713 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5714 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5715 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5716 	},
5717 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5718 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5719 	  0, 0
5720 	},
5721 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5722 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5723 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5724 	},
5725 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5726 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5727 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5728 	},
5729 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5730 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5731 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5732 	},
5733 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5734 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5735 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5736 	},
5737 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5738 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5739 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5740 	},
5741 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5742 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5743 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5744 	},
5745 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5746 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5747 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5748 	},
5749 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5750 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5751 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5752 	},
5753 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5754 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5755 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5756 	},
5757 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5758 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5759 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5760 	},
5761 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5762 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5763 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5764 	},
5765 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5766 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5767 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5768 	},
5769 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5770 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5771 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5772 	},
5773 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5774 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5775 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5776 	},
5777 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5778 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5779 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5780 	},
5781 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5782 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5783 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5784 	},
5785 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5786 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5787 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5788 	},
5789 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5790 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5791 	  0, 0
5792 	},
5793 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5794 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5795 	  0, 0
5796 	},
5797 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5798 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5799 	  0, 0
5800 	},
5801 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5802 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5803 	  0, 0
5804 	},
5805 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5806 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5807 	  0, 0
5808 	},
5809 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5810 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5811 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5812 	},
5813 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5814 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5815 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5816 	},
5817 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5818 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5819 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5820 	},
5821 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5822 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5823 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5824 	},
5825 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5826 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5827 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5828 	},
5829 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5830 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5831 	  0, 0
5832 	},
5833 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5834 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5835 	  0, 0
5836 	},
5837 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5838 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5839 	  0, 0
5840 	},
5841 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5842 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5843 	  0, 0
5844 	},
5845 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5846 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5847 	  0, 0
5848 	},
5849 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5850 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5851 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5852 	},
5853 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5854 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5855 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5856 	},
5857 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5858 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5859 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5860 	},
5861 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5862 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5863 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5864 	},
5865 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5866 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5867 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5868 	},
5869 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5870 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5871 	  0, 0
5872 	},
5873 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5874 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5875 	  0, 0
5876 	},
5877 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5878 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5879 	  0, 0
5880 	},
5881 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5882 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5883 	  0, 0
5884 	},
5885 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5886 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5887 	  0, 0
5888 	},
5889 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5890 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5891 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5892 	},
5893 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5894 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5895 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5896 	},
5897 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5898 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5899 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5900 	},
5901 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5902 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5903 	  0, 0
5904 	},
5905 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5906 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5907 	  0, 0
5908 	},
5909 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5910 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5911 	  0, 0
5912 	},
5913 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5914 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5915 	  0, 0
5916 	},
5917 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5918 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5919 	  0, 0
5920 	},
5921 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5922 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5923 	  0, 0
5924 	}
5925 };
5926 
5927 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5928 				     void *inject_if)
5929 {
5930 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5931 	int ret;
5932 	struct ta_ras_trigger_error_input block_info = { 0 };
5933 
5934 	if (adev->asic_type != CHIP_VEGA20)
5935 		return -EINVAL;
5936 
5937 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5938 		return -EINVAL;
5939 
5940 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5941 		return -EPERM;
5942 
5943 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5944 	      info->head.type)) {
5945 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5946 			ras_gfx_subblocks[info->head.sub_block_index].name,
5947 			info->head.type);
5948 		return -EPERM;
5949 	}
5950 
5951 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5952 	      info->head.type)) {
5953 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5954 			ras_gfx_subblocks[info->head.sub_block_index].name,
5955 			info->head.type);
5956 		return -EPERM;
5957 	}
5958 
5959 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5960 	block_info.sub_block_index =
5961 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5962 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5963 	block_info.address = info->address;
5964 	block_info.value = info->value;
5965 
5966 	mutex_lock(&adev->grbm_idx_mutex);
5967 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
5968 	mutex_unlock(&adev->grbm_idx_mutex);
5969 
5970 	return ret;
5971 }
5972 
5973 static const char *vml2_mems[] = {
5974 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5975 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5976 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
5977 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
5978 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5979 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5980 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
5981 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
5982 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5983 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5984 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
5985 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
5986 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5987 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5988 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
5989 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
5990 };
5991 
5992 static const char *vml2_walker_mems[] = {
5993 	"UTC_VML2_CACHE_PDE0_MEM0",
5994 	"UTC_VML2_CACHE_PDE0_MEM1",
5995 	"UTC_VML2_CACHE_PDE1_MEM0",
5996 	"UTC_VML2_CACHE_PDE1_MEM1",
5997 	"UTC_VML2_CACHE_PDE2_MEM0",
5998 	"UTC_VML2_CACHE_PDE2_MEM1",
5999 	"UTC_VML2_RDIF_LOG_FIFO",
6000 };
6001 
6002 static const char *atc_l2_cache_2m_mems[] = {
6003 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6004 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6005 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6006 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6007 };
6008 
6009 static const char *atc_l2_cache_4k_mems[] = {
6010 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6011 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6012 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6013 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6014 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6015 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6016 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6017 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6018 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6019 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6020 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6021 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6022 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6023 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6024 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6025 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6026 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6027 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6028 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6029 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6030 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6031 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6032 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6033 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6034 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6035 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6036 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6037 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6038 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6039 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6040 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6041 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6042 };
6043 
6044 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6045 					 struct ras_err_data *err_data)
6046 {
6047 	uint32_t i, data;
6048 	uint32_t sec_count, ded_count;
6049 
6050 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6051 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6052 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6053 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6054 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6055 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6056 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6057 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6058 
6059 	for (i = 0; i < 16; i++) {
6060 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6061 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6062 
6063 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6064 		if (sec_count) {
6065 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6066 				 vml2_mems[i], sec_count);
6067 			err_data->ce_count += sec_count;
6068 		}
6069 
6070 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6071 		if (ded_count) {
6072 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6073 				 vml2_mems[i], ded_count);
6074 			err_data->ue_count += ded_count;
6075 		}
6076 	}
6077 
6078 	for (i = 0; i < 7; i++) {
6079 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6080 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6081 
6082 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6083 						SEC_COUNT);
6084 		if (sec_count) {
6085 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6086 				 vml2_walker_mems[i], sec_count);
6087 			err_data->ce_count += sec_count;
6088 		}
6089 
6090 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6091 						DED_COUNT);
6092 		if (ded_count) {
6093 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6094 				 vml2_walker_mems[i], ded_count);
6095 			err_data->ue_count += ded_count;
6096 		}
6097 	}
6098 
6099 	for (i = 0; i < 4; i++) {
6100 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6101 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6102 
6103 		sec_count = (data & 0x00006000L) >> 0xd;
6104 		if (sec_count) {
6105 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6106 				 atc_l2_cache_2m_mems[i], sec_count);
6107 			err_data->ce_count += sec_count;
6108 		}
6109 	}
6110 
6111 	for (i = 0; i < 32; i++) {
6112 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6113 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6114 
6115 		sec_count = (data & 0x00006000L) >> 0xd;
6116 		if (sec_count) {
6117 			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6118 				 atc_l2_cache_4k_mems[i], sec_count);
6119 			err_data->ce_count += sec_count;
6120 		}
6121 
6122 		ded_count = (data & 0x00018000L) >> 0xf;
6123 		if (ded_count) {
6124 			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6125 				 atc_l2_cache_4k_mems[i], ded_count);
6126 			err_data->ue_count += ded_count;
6127 		}
6128 	}
6129 
6130 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6131 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6132 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6133 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6134 
6135 	return 0;
6136 }
6137 
6138 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6139 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6140 	uint32_t *sec_count, uint32_t *ded_count)
6141 {
6142 	uint32_t i;
6143 	uint32_t sec_cnt, ded_cnt;
6144 
6145 	for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6146 		if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6147 			gc_ras_fields_vg20[i].seg != reg->seg ||
6148 			gc_ras_fields_vg20[i].inst != reg->inst)
6149 			continue;
6150 
6151 		sec_cnt = (value &
6152 				gc_ras_fields_vg20[i].sec_count_mask) >>
6153 				gc_ras_fields_vg20[i].sec_count_shift;
6154 		if (sec_cnt) {
6155 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6156 				gc_ras_fields_vg20[i].name,
6157 				se_id, inst_id,
6158 				sec_cnt);
6159 			*sec_count += sec_cnt;
6160 		}
6161 
6162 		ded_cnt = (value &
6163 				gc_ras_fields_vg20[i].ded_count_mask) >>
6164 				gc_ras_fields_vg20[i].ded_count_shift;
6165 		if (ded_cnt) {
6166 			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6167 				gc_ras_fields_vg20[i].name,
6168 				se_id, inst_id,
6169 				ded_cnt);
6170 			*ded_count += ded_cnt;
6171 		}
6172 	}
6173 
6174 	return 0;
6175 }
6176 
6177 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6178 					  void *ras_error_status)
6179 {
6180 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6181 	uint32_t sec_count = 0, ded_count = 0;
6182 	uint32_t i, j, k;
6183 	uint32_t reg_value;
6184 
6185 	if (adev->asic_type != CHIP_VEGA20)
6186 		return -EINVAL;
6187 
6188 	err_data->ue_count = 0;
6189 	err_data->ce_count = 0;
6190 
6191 	mutex_lock(&adev->grbm_idx_mutex);
6192 
6193 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6194 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6195 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6196 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6197 				reg_value =
6198 					RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6199 				if (reg_value)
6200 					__get_ras_error_count(&sec_ded_counter_registers[i],
6201 							j, k, reg_value,
6202 							&sec_count, &ded_count);
6203 			}
6204 		}
6205 	}
6206 
6207 	err_data->ce_count += sec_count;
6208 	err_data->ue_count += ded_count;
6209 
6210 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6211 	mutex_unlock(&adev->grbm_idx_mutex);
6212 
6213 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6214 
6215 	return 0;
6216 }
6217 
6218 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6219 	.name = "gfx_v9_0",
6220 	.early_init = gfx_v9_0_early_init,
6221 	.late_init = gfx_v9_0_late_init,
6222 	.sw_init = gfx_v9_0_sw_init,
6223 	.sw_fini = gfx_v9_0_sw_fini,
6224 	.hw_init = gfx_v9_0_hw_init,
6225 	.hw_fini = gfx_v9_0_hw_fini,
6226 	.suspend = gfx_v9_0_suspend,
6227 	.resume = gfx_v9_0_resume,
6228 	.is_idle = gfx_v9_0_is_idle,
6229 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6230 	.soft_reset = gfx_v9_0_soft_reset,
6231 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6232 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6233 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6234 };
6235 
6236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6237 	.type = AMDGPU_RING_TYPE_GFX,
6238 	.align_mask = 0xff,
6239 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6240 	.support_64bit_ptrs = true,
6241 	.vmhub = AMDGPU_GFXHUB_0,
6242 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6243 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6244 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6245 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6246 		5 +  /* COND_EXEC */
6247 		7 +  /* PIPELINE_SYNC */
6248 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6249 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6250 		2 + /* VM_FLUSH */
6251 		8 +  /* FENCE for VM_FLUSH */
6252 		20 + /* GDS switch */
6253 		4 + /* double SWITCH_BUFFER,
6254 		       the first COND_EXEC jump to the place just
6255 			   prior to this double SWITCH_BUFFER  */
6256 		5 + /* COND_EXEC */
6257 		7 +	 /*	HDP_flush */
6258 		4 +	 /*	VGT_flush */
6259 		14 + /*	CE_META */
6260 		31 + /*	DE_META */
6261 		3 + /* CNTX_CTRL */
6262 		5 + /* HDP_INVL */
6263 		8 + 8 + /* FENCE x2 */
6264 		2, /* SWITCH_BUFFER */
6265 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6266 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6267 	.emit_fence = gfx_v9_0_ring_emit_fence,
6268 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6269 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6270 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6271 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6272 	.test_ring = gfx_v9_0_ring_test_ring,
6273 	.test_ib = gfx_v9_0_ring_test_ib,
6274 	.insert_nop = amdgpu_ring_insert_nop,
6275 	.pad_ib = amdgpu_ring_generic_pad_ib,
6276 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6277 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6278 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6279 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6280 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6281 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6282 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6283 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6284 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6285 };
6286 
6287 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6288 	.type = AMDGPU_RING_TYPE_COMPUTE,
6289 	.align_mask = 0xff,
6290 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6291 	.support_64bit_ptrs = true,
6292 	.vmhub = AMDGPU_GFXHUB_0,
6293 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6294 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6295 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6296 	.emit_frame_size =
6297 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6298 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6299 		5 + /* hdp invalidate */
6300 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6301 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6302 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6303 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6304 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6305 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6306 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6307 	.emit_fence = gfx_v9_0_ring_emit_fence,
6308 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6309 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6310 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6311 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6312 	.test_ring = gfx_v9_0_ring_test_ring,
6313 	.test_ib = gfx_v9_0_ring_test_ib,
6314 	.insert_nop = amdgpu_ring_insert_nop,
6315 	.pad_ib = amdgpu_ring_generic_pad_ib,
6316 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6317 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6318 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6319 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6320 };
6321 
6322 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6323 	.type = AMDGPU_RING_TYPE_KIQ,
6324 	.align_mask = 0xff,
6325 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6326 	.support_64bit_ptrs = true,
6327 	.vmhub = AMDGPU_GFXHUB_0,
6328 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6329 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6330 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6331 	.emit_frame_size =
6332 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6333 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6334 		5 + /* hdp invalidate */
6335 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6336 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6337 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6338 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6339 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6340 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6341 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6342 	.test_ring = gfx_v9_0_ring_test_ring,
6343 	.insert_nop = amdgpu_ring_insert_nop,
6344 	.pad_ib = amdgpu_ring_generic_pad_ib,
6345 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6346 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6347 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6348 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6349 };
6350 
6351 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6352 {
6353 	int i;
6354 
6355 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6356 
6357 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6358 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6359 
6360 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6361 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6362 }
6363 
6364 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6365 	.set = gfx_v9_0_set_eop_interrupt_state,
6366 	.process = gfx_v9_0_eop_irq,
6367 };
6368 
6369 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6370 	.set = gfx_v9_0_set_priv_reg_fault_state,
6371 	.process = gfx_v9_0_priv_reg_irq,
6372 };
6373 
6374 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6375 	.set = gfx_v9_0_set_priv_inst_fault_state,
6376 	.process = gfx_v9_0_priv_inst_irq,
6377 };
6378 
6379 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6380 	.set = gfx_v9_0_set_cp_ecc_error_state,
6381 	.process = amdgpu_gfx_cp_ecc_error_irq,
6382 };
6383 
6384 
6385 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6386 {
6387 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6388 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6389 
6390 	adev->gfx.priv_reg_irq.num_types = 1;
6391 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6392 
6393 	adev->gfx.priv_inst_irq.num_types = 1;
6394 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6395 
6396 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6397 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6398 }
6399 
6400 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6401 {
6402 	switch (adev->asic_type) {
6403 	case CHIP_VEGA10:
6404 	case CHIP_VEGA12:
6405 	case CHIP_VEGA20:
6406 	case CHIP_RAVEN:
6407 	case CHIP_ARCTURUS:
6408 	case CHIP_RENOIR:
6409 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6410 		break;
6411 	default:
6412 		break;
6413 	}
6414 }
6415 
6416 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6417 {
6418 	/* init asci gds info */
6419 	switch (adev->asic_type) {
6420 	case CHIP_VEGA10:
6421 	case CHIP_VEGA12:
6422 	case CHIP_VEGA20:
6423 		adev->gds.gds_size = 0x10000;
6424 		break;
6425 	case CHIP_RAVEN:
6426 	case CHIP_ARCTURUS:
6427 		adev->gds.gds_size = 0x1000;
6428 		break;
6429 	default:
6430 		adev->gds.gds_size = 0x10000;
6431 		break;
6432 	}
6433 
6434 	switch (adev->asic_type) {
6435 	case CHIP_VEGA10:
6436 	case CHIP_VEGA20:
6437 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6438 		break;
6439 	case CHIP_VEGA12:
6440 		adev->gds.gds_compute_max_wave_id = 0x27f;
6441 		break;
6442 	case CHIP_RAVEN:
6443 		if (adev->rev_id >= 0x8)
6444 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6445 		else
6446 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6447 		break;
6448 	case CHIP_ARCTURUS:
6449 		adev->gds.gds_compute_max_wave_id = 0xfff;
6450 		break;
6451 	default:
6452 		/* this really depends on the chip */
6453 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6454 		break;
6455 	}
6456 
6457 	adev->gds.gws_size = 64;
6458 	adev->gds.oa_size = 16;
6459 }
6460 
6461 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6462 						 u32 bitmap)
6463 {
6464 	u32 data;
6465 
6466 	if (!bitmap)
6467 		return;
6468 
6469 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6470 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6471 
6472 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6473 }
6474 
6475 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6476 {
6477 	u32 data, mask;
6478 
6479 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6480 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6481 
6482 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6483 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6484 
6485 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6486 
6487 	return (~data) & mask;
6488 }
6489 
6490 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6491 				 struct amdgpu_cu_info *cu_info)
6492 {
6493 	int i, j, k, counter, active_cu_number = 0;
6494 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6495 	unsigned disable_masks[4 * 4];
6496 
6497 	if (!adev || !cu_info)
6498 		return -EINVAL;
6499 
6500 	/*
6501 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6502 	 */
6503 	if (adev->gfx.config.max_shader_engines *
6504 		adev->gfx.config.max_sh_per_se > 16)
6505 		return -EINVAL;
6506 
6507 	amdgpu_gfx_parse_disable_cu(disable_masks,
6508 				    adev->gfx.config.max_shader_engines,
6509 				    adev->gfx.config.max_sh_per_se);
6510 
6511 	mutex_lock(&adev->grbm_idx_mutex);
6512 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6513 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6514 			mask = 1;
6515 			ao_bitmap = 0;
6516 			counter = 0;
6517 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6518 			gfx_v9_0_set_user_cu_inactive_bitmap(
6519 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6520 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6521 
6522 			/*
6523 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6524 			 * 4x4 size array, and it's usually suitable for Vega
6525 			 * ASICs which has 4*2 SE/SH layout.
6526 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6527 			 * To mostly reduce the impact, we make it compatible
6528 			 * with current bitmap array as below:
6529 			 *    SE4,SH0 --> bitmap[0][1]
6530 			 *    SE5,SH0 --> bitmap[1][1]
6531 			 *    SE6,SH0 --> bitmap[2][1]
6532 			 *    SE7,SH0 --> bitmap[3][1]
6533 			 */
6534 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6535 
6536 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6537 				if (bitmap & mask) {
6538 					if (counter < adev->gfx.config.max_cu_per_sh)
6539 						ao_bitmap |= mask;
6540 					counter ++;
6541 				}
6542 				mask <<= 1;
6543 			}
6544 			active_cu_number += counter;
6545 			if (i < 2 && j < 2)
6546 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6547 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6548 		}
6549 	}
6550 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6551 	mutex_unlock(&adev->grbm_idx_mutex);
6552 
6553 	cu_info->number = active_cu_number;
6554 	cu_info->ao_cu_mask = ao_cu_mask;
6555 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6556 
6557 	return 0;
6558 }
6559 
6560 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6561 {
6562 	.type = AMD_IP_BLOCK_TYPE_GFX,
6563 	.major = 9,
6564 	.minor = 0,
6565 	.rev = 0,
6566 	.funcs = &gfx_v9_0_ip_funcs,
6567 };
6568