xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 8ef9ea1503d0a129cc6f5cf48fb63633efa5d766)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54 
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58 
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133 
134 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146 
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151 
152 enum ta_ras_gfx_subblock {
153 	/*CPC*/
154 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPC_UCODE,
157 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164 	/* CPF*/
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168 	TA_RAS_BLOCK__GFX_CPF_TAG,
169 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170 	/* CPG*/
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174 	TA_RAS_BLOCK__GFX_CPG_TAG,
175 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176 	/* GDS*/
177 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184 	/* SPI*/
185 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186 	/* SQ*/
187 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
190 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
191 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193 	/* SQC (3 ranges)*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 	/* SQC range 0*/
196 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207 	/* SQC range 1*/
208 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221 	/* SQC range 2*/
222 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236 	/* TA*/
237 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
238 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244 	/* TCA*/
245 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249 	/* TCC (5 sub-ranges)*/
250 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 	/* TCC range 0*/
252 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262 	/* TCC range 1*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268 	/* TCC range 2*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280 	/* TCC range 3*/
281 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286 	/* TCC range 4*/
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294 	/* TCI*/
295 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296 	/* TCP*/
297 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306 	/* TD*/
307 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
308 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312 	/* EA (3 sub-ranges)*/
313 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 	/* EA range 0*/
315 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325 	/* EA range 1*/
326 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335 	/* EA range 2*/
336 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343 	/* UTC VM L2 bank*/
344 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345 	/* UTC VM walker*/
346 	TA_RAS_BLOCK__UTC_VML2_WALKER,
347 	/* UTC ATC L2 2MB cache*/
348 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349 	/* UTC ATC L2 4KB cache*/
350 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351 	TA_RAS_BLOCK__GFX_MAX
352 };
353 
354 struct ras_gfx_subblock {
355 	unsigned char *name;
356 	int ta_subblock;
357 	int hw_supported_error_type;
358 	int sw_supported_error_type;
359 };
360 
361 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
362 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
363 		#subblock,                                                     \
364 		TA_RAS_BLOCK__##subblock,                                      \
365 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
366 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
367 	}
368 
369 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389 			     0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     1),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430 			     0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 			     0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442 			     0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456 			     1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458 			     1),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462 			     0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475 			     0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480 			     0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482 			     0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517 };
518 
519 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520 {
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541 };
542 
543 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544 {
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563 };
564 
565 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566 {
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578 };
579 
580 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581 {
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606 };
607 
608 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609 {
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617 };
618 
619 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620 {
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656 };
657 
658 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659 {
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663 };
664 
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666 {
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683 };
684 
685 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686 {
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700 };
701 
702 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703 {
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715 };
716 
717 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720 };
721 
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723 {
724 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 };
733 
734 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735 {
736 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 };
745 
746 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750 
751 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756 				struct amdgpu_cu_info *cu_info);
757 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
759 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761 					  void *ras_error_status);
762 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763 				     void *inject_if, uint32_t instance_mask);
764 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
766 					      unsigned int vmid);
767 
768 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
769 				uint64_t queue_mask)
770 {
771 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
772 	amdgpu_ring_write(kiq_ring,
773 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
774 		/* vmid_mask:0* queue_type:0 (KIQ) */
775 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
776 	amdgpu_ring_write(kiq_ring,
777 			lower_32_bits(queue_mask));	/* queue mask lo */
778 	amdgpu_ring_write(kiq_ring,
779 			upper_32_bits(queue_mask));	/* queue mask hi */
780 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
781 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
782 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
783 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
784 }
785 
786 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
787 				 struct amdgpu_ring *ring)
788 {
789 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
790 	uint64_t wptr_addr = ring->wptr_gpu_addr;
791 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
792 
793 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
794 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
795 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
796 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
797 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
798 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
799 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
800 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
801 			 /*queue_type: normal compute queue */
802 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
803 			 /* alloc format: all_on_one_pipe */
804 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
805 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
806 			 /* num_queues: must be 1 */
807 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
808 	amdgpu_ring_write(kiq_ring,
809 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
810 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
811 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
812 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
813 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
814 }
815 
816 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
817 				   struct amdgpu_ring *ring,
818 				   enum amdgpu_unmap_queues_action action,
819 				   u64 gpu_addr, u64 seq)
820 {
821 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
822 
823 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
824 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
825 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
826 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
827 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
828 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
829 	amdgpu_ring_write(kiq_ring,
830 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
831 
832 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
833 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
834 		amdgpu_ring_write(kiq_ring, 0);
835 		amdgpu_ring_write(kiq_ring, 0);
836 
837 	} else {
838 		amdgpu_ring_write(kiq_ring, 0);
839 		amdgpu_ring_write(kiq_ring, 0);
840 		amdgpu_ring_write(kiq_ring, 0);
841 	}
842 }
843 
844 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
845 				   struct amdgpu_ring *ring,
846 				   u64 addr,
847 				   u64 seq)
848 {
849 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
850 
851 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
852 	amdgpu_ring_write(kiq_ring,
853 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
854 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
855 			  PACKET3_QUERY_STATUS_COMMAND(2));
856 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
857 	amdgpu_ring_write(kiq_ring,
858 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
859 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
860 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
861 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
862 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
863 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
864 }
865 
866 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
867 				uint16_t pasid, uint32_t flush_type,
868 				bool all_hub)
869 {
870 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
871 	amdgpu_ring_write(kiq_ring,
872 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
873 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
874 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
875 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
876 }
877 
878 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
879 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
880 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
881 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
882 	.kiq_query_status = gfx_v9_0_kiq_query_status,
883 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
884 	.set_resources_size = 8,
885 	.map_queues_size = 7,
886 	.unmap_queues_size = 6,
887 	.query_status_size = 7,
888 	.invalidate_tlbs_size = 2,
889 };
890 
891 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
892 {
893 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
894 }
895 
896 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
897 {
898 	switch (adev->ip_versions[GC_HWIP][0]) {
899 	case IP_VERSION(9, 0, 1):
900 		soc15_program_register_sequence(adev,
901 						golden_settings_gc_9_0,
902 						ARRAY_SIZE(golden_settings_gc_9_0));
903 		soc15_program_register_sequence(adev,
904 						golden_settings_gc_9_0_vg10,
905 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
906 		break;
907 	case IP_VERSION(9, 2, 1):
908 		soc15_program_register_sequence(adev,
909 						golden_settings_gc_9_2_1,
910 						ARRAY_SIZE(golden_settings_gc_9_2_1));
911 		soc15_program_register_sequence(adev,
912 						golden_settings_gc_9_2_1_vg12,
913 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
914 		break;
915 	case IP_VERSION(9, 4, 0):
916 		soc15_program_register_sequence(adev,
917 						golden_settings_gc_9_0,
918 						ARRAY_SIZE(golden_settings_gc_9_0));
919 		soc15_program_register_sequence(adev,
920 						golden_settings_gc_9_0_vg20,
921 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
922 		break;
923 	case IP_VERSION(9, 4, 1):
924 		soc15_program_register_sequence(adev,
925 						golden_settings_gc_9_4_1_arct,
926 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
927 		break;
928 	case IP_VERSION(9, 2, 2):
929 	case IP_VERSION(9, 1, 0):
930 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
931 						ARRAY_SIZE(golden_settings_gc_9_1));
932 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
933 			soc15_program_register_sequence(adev,
934 							golden_settings_gc_9_1_rv2,
935 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
936 		else
937 			soc15_program_register_sequence(adev,
938 							golden_settings_gc_9_1_rv1,
939 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
940 		break;
941 	 case IP_VERSION(9, 3, 0):
942 		soc15_program_register_sequence(adev,
943 						golden_settings_gc_9_1_rn,
944 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
945 		return; /* for renoir, don't need common goldensetting */
946 	case IP_VERSION(9, 4, 2):
947 		gfx_v9_4_2_init_golden_registers(adev,
948 						 adev->smuio.funcs->get_die_id(adev));
949 		break;
950 	default:
951 		break;
952 	}
953 
954 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
955 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
956 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
957 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
958 }
959 
960 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
961 				       bool wc, uint32_t reg, uint32_t val)
962 {
963 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
964 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
965 				WRITE_DATA_DST_SEL(0) |
966 				(wc ? WR_CONFIRM : 0));
967 	amdgpu_ring_write(ring, reg);
968 	amdgpu_ring_write(ring, 0);
969 	amdgpu_ring_write(ring, val);
970 }
971 
972 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
973 				  int mem_space, int opt, uint32_t addr0,
974 				  uint32_t addr1, uint32_t ref, uint32_t mask,
975 				  uint32_t inv)
976 {
977 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
978 	amdgpu_ring_write(ring,
979 				 /* memory (1) or register (0) */
980 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
981 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
982 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
983 				 WAIT_REG_MEM_ENGINE(eng_sel)));
984 
985 	if (mem_space)
986 		BUG_ON(addr0 & 0x3); /* Dword align */
987 	amdgpu_ring_write(ring, addr0);
988 	amdgpu_ring_write(ring, addr1);
989 	amdgpu_ring_write(ring, ref);
990 	amdgpu_ring_write(ring, mask);
991 	amdgpu_ring_write(ring, inv); /* poll interval */
992 }
993 
994 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
995 {
996 	struct amdgpu_device *adev = ring->adev;
997 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
998 	uint32_t tmp = 0;
999 	unsigned i;
1000 	int r;
1001 
1002 	WREG32(scratch, 0xCAFEDEAD);
1003 	r = amdgpu_ring_alloc(ring, 3);
1004 	if (r)
1005 		return r;
1006 
1007 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1008 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1009 	amdgpu_ring_write(ring, 0xDEADBEEF);
1010 	amdgpu_ring_commit(ring);
1011 
1012 	for (i = 0; i < adev->usec_timeout; i++) {
1013 		tmp = RREG32(scratch);
1014 		if (tmp == 0xDEADBEEF)
1015 			break;
1016 		udelay(1);
1017 	}
1018 
1019 	if (i >= adev->usec_timeout)
1020 		r = -ETIMEDOUT;
1021 	return r;
1022 }
1023 
1024 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1025 {
1026 	struct amdgpu_device *adev = ring->adev;
1027 	struct amdgpu_ib ib;
1028 	struct dma_fence *f = NULL;
1029 
1030 	unsigned index;
1031 	uint64_t gpu_addr;
1032 	uint32_t tmp;
1033 	long r;
1034 
1035 	r = amdgpu_device_wb_get(adev, &index);
1036 	if (r)
1037 		return r;
1038 
1039 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1040 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1041 	memset(&ib, 0, sizeof(ib));
1042 
1043 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1044 	if (r)
1045 		goto err1;
1046 
1047 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1048 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1049 	ib.ptr[2] = lower_32_bits(gpu_addr);
1050 	ib.ptr[3] = upper_32_bits(gpu_addr);
1051 	ib.ptr[4] = 0xDEADBEEF;
1052 	ib.length_dw = 5;
1053 
1054 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1055 	if (r)
1056 		goto err2;
1057 
1058 	r = dma_fence_wait_timeout(f, false, timeout);
1059 	if (r == 0) {
1060 		r = -ETIMEDOUT;
1061 		goto err2;
1062 	} else if (r < 0) {
1063 		goto err2;
1064 	}
1065 
1066 	tmp = adev->wb.wb[index];
1067 	if (tmp == 0xDEADBEEF)
1068 		r = 0;
1069 	else
1070 		r = -EINVAL;
1071 
1072 err2:
1073 	amdgpu_ib_free(adev, &ib, NULL);
1074 	dma_fence_put(f);
1075 err1:
1076 	amdgpu_device_wb_free(adev, index);
1077 	return r;
1078 }
1079 
1080 
1081 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1082 {
1083 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1084 	amdgpu_ucode_release(&adev->gfx.me_fw);
1085 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1086 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1087 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1088 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1089 
1090 	kfree(adev->gfx.rlc.register_list_format);
1091 }
1092 
1093 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1094 {
1095 	adev->gfx.me_fw_write_wait = false;
1096 	adev->gfx.mec_fw_write_wait = false;
1097 
1098 	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1099 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1100 	    (adev->gfx.mec_feature_version < 46) ||
1101 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1102 	    (adev->gfx.pfp_feature_version < 46)))
1103 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1104 
1105 	switch (adev->ip_versions[GC_HWIP][0]) {
1106 	case IP_VERSION(9, 0, 1):
1107 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1108 		    (adev->gfx.me_feature_version >= 42) &&
1109 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1110 		    (adev->gfx.pfp_feature_version >= 42))
1111 			adev->gfx.me_fw_write_wait = true;
1112 
1113 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1114 		    (adev->gfx.mec_feature_version >= 42))
1115 			adev->gfx.mec_fw_write_wait = true;
1116 		break;
1117 	case IP_VERSION(9, 2, 1):
1118 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119 		    (adev->gfx.me_feature_version >= 44) &&
1120 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1121 		    (adev->gfx.pfp_feature_version >= 44))
1122 			adev->gfx.me_fw_write_wait = true;
1123 
1124 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1125 		    (adev->gfx.mec_feature_version >= 44))
1126 			adev->gfx.mec_fw_write_wait = true;
1127 		break;
1128 	case IP_VERSION(9, 4, 0):
1129 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130 		    (adev->gfx.me_feature_version >= 44) &&
1131 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132 		    (adev->gfx.pfp_feature_version >= 44))
1133 			adev->gfx.me_fw_write_wait = true;
1134 
1135 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1136 		    (adev->gfx.mec_feature_version >= 44))
1137 			adev->gfx.mec_fw_write_wait = true;
1138 		break;
1139 	case IP_VERSION(9, 1, 0):
1140 	case IP_VERSION(9, 2, 2):
1141 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1142 		    (adev->gfx.me_feature_version >= 42) &&
1143 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1144 		    (adev->gfx.pfp_feature_version >= 42))
1145 			adev->gfx.me_fw_write_wait = true;
1146 
1147 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1148 		    (adev->gfx.mec_feature_version >= 42))
1149 			adev->gfx.mec_fw_write_wait = true;
1150 		break;
1151 	default:
1152 		adev->gfx.me_fw_write_wait = true;
1153 		adev->gfx.mec_fw_write_wait = true;
1154 		break;
1155 	}
1156 }
1157 
1158 struct amdgpu_gfxoff_quirk {
1159 	u16 chip_vendor;
1160 	u16 chip_device;
1161 	u16 subsys_vendor;
1162 	u16 subsys_device;
1163 	u8 revision;
1164 };
1165 
1166 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1167 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1168 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1169 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1170 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1171 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1172 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1173 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1174 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1175 	/* https://bbs.openkylin.top/t/topic/171497 */
1176 	{ 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1177 	/* HP 705G4 DM with R5 2400G */
1178 	{ 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1179 	{ 0, 0, 0, 0, 0 },
1180 };
1181 
1182 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1183 {
1184 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1185 
1186 	while (p && p->chip_device != 0) {
1187 		if (pdev->vendor == p->chip_vendor &&
1188 		    pdev->device == p->chip_device &&
1189 		    pdev->subsystem_vendor == p->subsys_vendor &&
1190 		    pdev->subsystem_device == p->subsys_device &&
1191 		    pdev->revision == p->revision) {
1192 			return true;
1193 		}
1194 		++p;
1195 	}
1196 	return false;
1197 }
1198 
1199 static bool is_raven_kicker(struct amdgpu_device *adev)
1200 {
1201 	if (adev->pm.fw_version >= 0x41e2b)
1202 		return true;
1203 	else
1204 		return false;
1205 }
1206 
1207 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1208 {
1209 	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1210 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1211 	    (adev->gfx.me_feature_version >= 52))
1212 		return true;
1213 	else
1214 		return false;
1215 }
1216 
1217 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1218 {
1219 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1220 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221 
1222 	switch (adev->ip_versions[GC_HWIP][0]) {
1223 	case IP_VERSION(9, 0, 1):
1224 	case IP_VERSION(9, 2, 1):
1225 	case IP_VERSION(9, 4, 0):
1226 		break;
1227 	case IP_VERSION(9, 2, 2):
1228 	case IP_VERSION(9, 1, 0):
1229 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1230 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1231 		    ((!is_raven_kicker(adev) &&
1232 		      adev->gfx.rlc_fw_version < 531) ||
1233 		     (adev->gfx.rlc_feature_version < 1) ||
1234 		     !adev->gfx.rlc.is_rlc_v2_1))
1235 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1236 
1237 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1238 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1239 				AMD_PG_SUPPORT_CP |
1240 				AMD_PG_SUPPORT_RLC_SMU_HS;
1241 		break;
1242 	case IP_VERSION(9, 3, 0):
1243 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1244 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1245 				AMD_PG_SUPPORT_CP |
1246 				AMD_PG_SUPPORT_RLC_SMU_HS;
1247 		break;
1248 	default:
1249 		break;
1250 	}
1251 }
1252 
1253 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1254 					  char *chip_name)
1255 {
1256 	char fw_name[30];
1257 	int err;
1258 
1259 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1260 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1261 	if (err)
1262 		goto out;
1263 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1264 
1265 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1266 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1267 	if (err)
1268 		goto out;
1269 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1270 
1271 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1272 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1273 	if (err)
1274 		goto out;
1275 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1276 
1277 out:
1278 	if (err) {
1279 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1280 		amdgpu_ucode_release(&adev->gfx.me_fw);
1281 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1282 	}
1283 	return err;
1284 }
1285 
1286 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1287 				       char *chip_name)
1288 {
1289 	char fw_name[30];
1290 	int err;
1291 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1292 	uint16_t version_major;
1293 	uint16_t version_minor;
1294 	uint32_t smu_version;
1295 
1296 	/*
1297 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1298 	 * instead of picasso_rlc.bin.
1299 	 * Judgment method:
1300 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1301 	 *          or revision >= 0xD8 && revision <= 0xDF
1302 	 * otherwise is PCO FP5
1303 	 */
1304 	if (!strcmp(chip_name, "picasso") &&
1305 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1306 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1307 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1308 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1309 		(smu_version >= 0x41e2b))
1310 		/**
1311 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1312 		*/
1313 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1314 	else
1315 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1316 	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1317 	if (err)
1318 		goto out;
1319 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1320 
1321 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1322 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1323 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1324 out:
1325 	if (err)
1326 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1327 
1328 	return err;
1329 }
1330 
1331 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1332 {
1333 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1334 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1335 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1336 		return false;
1337 
1338 	return true;
1339 }
1340 
1341 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1342 					      char *chip_name)
1343 {
1344 	char fw_name[30];
1345 	int err;
1346 
1347 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1348 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1349 	else
1350 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1351 
1352 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1353 	if (err)
1354 		goto out;
1355 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1356 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1357 
1358 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1359 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1360 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1361 		else
1362 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1363 
1364 		/* ignore failures to load */
1365 		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1366 		if (!err) {
1367 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1368 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1369 		} else {
1370 			err = 0;
1371 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1372 		}
1373 	} else {
1374 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1375 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1376 	}
1377 
1378 	gfx_v9_0_check_if_need_gfxoff(adev);
1379 	gfx_v9_0_check_fw_write_wait(adev);
1380 
1381 out:
1382 	if (err)
1383 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1384 	return err;
1385 }
1386 
1387 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1388 {
1389 	char ucode_prefix[30];
1390 	int r;
1391 
1392 	DRM_DEBUG("\n");
1393 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1394 
1395 	/* No CPG in Arcturus */
1396 	if (adev->gfx.num_gfx_rings) {
1397 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1398 		if (r)
1399 			return r;
1400 	}
1401 
1402 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1403 	if (r)
1404 		return r;
1405 
1406 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1407 	if (r)
1408 		return r;
1409 
1410 	return r;
1411 }
1412 
1413 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1414 {
1415 	u32 count = 0;
1416 	const struct cs_section_def *sect = NULL;
1417 	const struct cs_extent_def *ext = NULL;
1418 
1419 	/* begin clear state */
1420 	count += 2;
1421 	/* context control state */
1422 	count += 3;
1423 
1424 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1425 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1426 			if (sect->id == SECT_CONTEXT)
1427 				count += 2 + ext->reg_count;
1428 			else
1429 				return 0;
1430 		}
1431 	}
1432 
1433 	/* end clear state */
1434 	count += 2;
1435 	/* clear state */
1436 	count += 2;
1437 
1438 	return count;
1439 }
1440 
1441 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1442 				    volatile u32 *buffer)
1443 {
1444 	u32 count = 0, i;
1445 	const struct cs_section_def *sect = NULL;
1446 	const struct cs_extent_def *ext = NULL;
1447 
1448 	if (adev->gfx.rlc.cs_data == NULL)
1449 		return;
1450 	if (buffer == NULL)
1451 		return;
1452 
1453 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1454 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1455 
1456 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1457 	buffer[count++] = cpu_to_le32(0x80000000);
1458 	buffer[count++] = cpu_to_le32(0x80000000);
1459 
1460 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1461 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1462 			if (sect->id == SECT_CONTEXT) {
1463 				buffer[count++] =
1464 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1465 				buffer[count++] = cpu_to_le32(ext->reg_index -
1466 						PACKET3_SET_CONTEXT_REG_START);
1467 				for (i = 0; i < ext->reg_count; i++)
1468 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1469 			} else {
1470 				return;
1471 			}
1472 		}
1473 	}
1474 
1475 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1476 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1477 
1478 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1479 	buffer[count++] = cpu_to_le32(0);
1480 }
1481 
1482 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1483 {
1484 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1485 	uint32_t pg_always_on_cu_num = 2;
1486 	uint32_t always_on_cu_num;
1487 	uint32_t i, j, k;
1488 	uint32_t mask, cu_bitmap, counter;
1489 
1490 	if (adev->flags & AMD_IS_APU)
1491 		always_on_cu_num = 4;
1492 	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1493 		always_on_cu_num = 8;
1494 	else
1495 		always_on_cu_num = 12;
1496 
1497 	mutex_lock(&adev->grbm_idx_mutex);
1498 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1499 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1500 			mask = 1;
1501 			cu_bitmap = 0;
1502 			counter = 0;
1503 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1504 
1505 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1506 				if (cu_info->bitmap[0][i][j] & mask) {
1507 					if (counter == pg_always_on_cu_num)
1508 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1509 					if (counter < always_on_cu_num)
1510 						cu_bitmap |= mask;
1511 					else
1512 						break;
1513 					counter++;
1514 				}
1515 				mask <<= 1;
1516 			}
1517 
1518 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1519 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1520 		}
1521 	}
1522 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1523 	mutex_unlock(&adev->grbm_idx_mutex);
1524 }
1525 
1526 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1527 {
1528 	uint32_t data;
1529 
1530 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1531 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1532 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1533 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1534 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1535 
1536 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1537 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1538 
1539 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1540 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1541 
1542 	mutex_lock(&adev->grbm_idx_mutex);
1543 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1544 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1545 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1546 
1547 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1548 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1549 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1550 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1551 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1552 
1553 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1554 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1555 	data &= 0x0000FFFF;
1556 	data |= 0x00C00000;
1557 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1558 
1559 	/*
1560 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1561 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1562 	 */
1563 
1564 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1565 	 * but used for RLC_LB_CNTL configuration */
1566 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1567 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1568 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1569 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1570 	mutex_unlock(&adev->grbm_idx_mutex);
1571 
1572 	gfx_v9_0_init_always_on_cu_mask(adev);
1573 }
1574 
1575 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1576 {
1577 	uint32_t data;
1578 
1579 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1580 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1581 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1582 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1583 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1584 
1585 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1586 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1587 
1588 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1589 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1590 
1591 	mutex_lock(&adev->grbm_idx_mutex);
1592 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1593 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1594 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1595 
1596 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1597 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1598 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1599 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1600 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1601 
1602 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1603 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1604 	data &= 0x0000FFFF;
1605 	data |= 0x00C00000;
1606 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1607 
1608 	/*
1609 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1610 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1611 	 */
1612 
1613 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1614 	 * but used for RLC_LB_CNTL configuration */
1615 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1616 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1617 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1618 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1619 	mutex_unlock(&adev->grbm_idx_mutex);
1620 
1621 	gfx_v9_0_init_always_on_cu_mask(adev);
1622 }
1623 
1624 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1625 {
1626 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1627 }
1628 
1629 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1630 {
1631 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1632 		return 5;
1633 	else
1634 		return 4;
1635 }
1636 
1637 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1638 {
1639 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1640 
1641 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1642 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1643 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1644 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1645 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1646 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1647 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1648 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1649 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1650 }
1651 
1652 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1653 {
1654 	const struct cs_section_def *cs_data;
1655 	int r;
1656 
1657 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1658 
1659 	cs_data = adev->gfx.rlc.cs_data;
1660 
1661 	if (cs_data) {
1662 		/* init clear state block */
1663 		r = amdgpu_gfx_rlc_init_csb(adev);
1664 		if (r)
1665 			return r;
1666 	}
1667 
1668 	if (adev->flags & AMD_IS_APU) {
1669 		/* TODO: double check the cp_table_size for RV */
1670 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1671 		r = amdgpu_gfx_rlc_init_cpt(adev);
1672 		if (r)
1673 			return r;
1674 	}
1675 
1676 	return 0;
1677 }
1678 
1679 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1680 {
1681 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1682 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1683 }
1684 
1685 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1686 {
1687 	int r;
1688 	u32 *hpd;
1689 	const __le32 *fw_data;
1690 	unsigned fw_size;
1691 	u32 *fw;
1692 	size_t mec_hpd_size;
1693 
1694 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1695 
1696 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1697 
1698 	/* take ownership of the relevant compute queues */
1699 	amdgpu_gfx_compute_queue_acquire(adev);
1700 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1701 	if (mec_hpd_size) {
1702 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1703 					      AMDGPU_GEM_DOMAIN_VRAM |
1704 					      AMDGPU_GEM_DOMAIN_GTT,
1705 					      &adev->gfx.mec.hpd_eop_obj,
1706 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1707 					      (void **)&hpd);
1708 		if (r) {
1709 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1710 			gfx_v9_0_mec_fini(adev);
1711 			return r;
1712 		}
1713 
1714 		memset(hpd, 0, mec_hpd_size);
1715 
1716 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1717 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1718 	}
1719 
1720 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1721 
1722 	fw_data = (const __le32 *)
1723 		(adev->gfx.mec_fw->data +
1724 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1725 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1726 
1727 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1728 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1729 				      &adev->gfx.mec.mec_fw_obj,
1730 				      &adev->gfx.mec.mec_fw_gpu_addr,
1731 				      (void **)&fw);
1732 	if (r) {
1733 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1734 		gfx_v9_0_mec_fini(adev);
1735 		return r;
1736 	}
1737 
1738 	memcpy(fw, fw_data, fw_size);
1739 
1740 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1741 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1742 
1743 	return 0;
1744 }
1745 
1746 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1747 {
1748 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1749 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1750 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1751 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1752 		(SQ_IND_INDEX__FORCE_READ_MASK));
1753 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1754 }
1755 
1756 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1757 			   uint32_t wave, uint32_t thread,
1758 			   uint32_t regno, uint32_t num, uint32_t *out)
1759 {
1760 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1761 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1762 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1763 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1764 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1765 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1766 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1767 	while (num--)
1768 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1769 }
1770 
1771 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1772 {
1773 	/* type 1 wave data */
1774 	dst[(*no_fields)++] = 1;
1775 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1776 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1777 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1778 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1779 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1780 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1781 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1782 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1783 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1784 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1785 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1786 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1787 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1788 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1789 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1790 }
1791 
1792 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1793 				     uint32_t wave, uint32_t start,
1794 				     uint32_t size, uint32_t *dst)
1795 {
1796 	wave_read_regs(
1797 		adev, simd, wave, 0,
1798 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1799 }
1800 
1801 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1802 				     uint32_t wave, uint32_t thread,
1803 				     uint32_t start, uint32_t size,
1804 				     uint32_t *dst)
1805 {
1806 	wave_read_regs(
1807 		adev, simd, wave, thread,
1808 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1809 }
1810 
1811 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1812 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1813 {
1814 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1815 }
1816 
1817 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1818         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1819         .select_se_sh = &gfx_v9_0_select_se_sh,
1820         .read_wave_data = &gfx_v9_0_read_wave_data,
1821         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1822         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1823         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1824 };
1825 
1826 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1827 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1828 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1829 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1830 };
1831 
1832 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1833 	.ras_block = {
1834 		.hw_ops = &gfx_v9_0_ras_ops,
1835 	},
1836 };
1837 
1838 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1839 {
1840 	u32 gb_addr_config;
1841 	int err;
1842 
1843 	switch (adev->ip_versions[GC_HWIP][0]) {
1844 	case IP_VERSION(9, 0, 1):
1845 		adev->gfx.config.max_hw_contexts = 8;
1846 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1847 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1848 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1849 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1850 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1851 		break;
1852 	case IP_VERSION(9, 2, 1):
1853 		adev->gfx.config.max_hw_contexts = 8;
1854 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1858 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1859 		DRM_INFO("fix gfx.config for vega12\n");
1860 		break;
1861 	case IP_VERSION(9, 4, 0):
1862 		adev->gfx.ras = &gfx_v9_0_ras;
1863 		adev->gfx.config.max_hw_contexts = 8;
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1868 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1869 		gb_addr_config &= ~0xf3e777ff;
1870 		gb_addr_config |= 0x22014042;
1871 		/* check vbios table if gpu info is not available */
1872 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1873 		if (err)
1874 			return err;
1875 		break;
1876 	case IP_VERSION(9, 2, 2):
1877 	case IP_VERSION(9, 1, 0):
1878 		adev->gfx.config.max_hw_contexts = 8;
1879 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1883 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1884 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1885 		else
1886 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1887 		break;
1888 	case IP_VERSION(9, 4, 1):
1889 		adev->gfx.ras = &gfx_v9_4_ras;
1890 		adev->gfx.config.max_hw_contexts = 8;
1891 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1892 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1893 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1894 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1895 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1896 		gb_addr_config &= ~0xf3e777ff;
1897 		gb_addr_config |= 0x22014042;
1898 		break;
1899 	case IP_VERSION(9, 3, 0):
1900 		adev->gfx.config.max_hw_contexts = 8;
1901 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1904 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1905 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1906 		gb_addr_config &= ~0xf3e777ff;
1907 		gb_addr_config |= 0x22010042;
1908 		break;
1909 	case IP_VERSION(9, 4, 2):
1910 		adev->gfx.ras = &gfx_v9_4_2_ras;
1911 		adev->gfx.config.max_hw_contexts = 8;
1912 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1913 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1914 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1915 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1916 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1917 		gb_addr_config &= ~0xf3e777ff;
1918 		gb_addr_config |= 0x22014042;
1919 		/* check vbios table if gpu info is not available */
1920 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1921 		if (err)
1922 			return err;
1923 		break;
1924 	default:
1925 		BUG();
1926 		break;
1927 	}
1928 
1929 	adev->gfx.config.gb_addr_config = gb_addr_config;
1930 
1931 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1932 			REG_GET_FIELD(
1933 					adev->gfx.config.gb_addr_config,
1934 					GB_ADDR_CONFIG,
1935 					NUM_PIPES);
1936 
1937 	adev->gfx.config.max_tile_pipes =
1938 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1939 
1940 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1941 			REG_GET_FIELD(
1942 					adev->gfx.config.gb_addr_config,
1943 					GB_ADDR_CONFIG,
1944 					NUM_BANKS);
1945 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1946 			REG_GET_FIELD(
1947 					adev->gfx.config.gb_addr_config,
1948 					GB_ADDR_CONFIG,
1949 					MAX_COMPRESSED_FRAGS);
1950 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1951 			REG_GET_FIELD(
1952 					adev->gfx.config.gb_addr_config,
1953 					GB_ADDR_CONFIG,
1954 					NUM_RB_PER_SE);
1955 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1956 			REG_GET_FIELD(
1957 					adev->gfx.config.gb_addr_config,
1958 					GB_ADDR_CONFIG,
1959 					NUM_SHADER_ENGINES);
1960 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1961 			REG_GET_FIELD(
1962 					adev->gfx.config.gb_addr_config,
1963 					GB_ADDR_CONFIG,
1964 					PIPE_INTERLEAVE_SIZE));
1965 
1966 	return 0;
1967 }
1968 
1969 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1970 				      int mec, int pipe, int queue)
1971 {
1972 	unsigned irq_type;
1973 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1974 	unsigned int hw_prio;
1975 
1976 	ring = &adev->gfx.compute_ring[ring_id];
1977 
1978 	/* mec0 is me1 */
1979 	ring->me = mec + 1;
1980 	ring->pipe = pipe;
1981 	ring->queue = queue;
1982 
1983 	ring->ring_obj = NULL;
1984 	ring->use_doorbell = true;
1985 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1986 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1987 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1988 	ring->vm_hub = AMDGPU_GFXHUB(0);
1989 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1990 
1991 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1992 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1993 		+ ring->pipe;
1994 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1995 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1996 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1997 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1998 				hw_prio, NULL);
1999 }
2000 
2001 static int gfx_v9_0_sw_init(void *handle)
2002 {
2003 	int i, j, k, r, ring_id;
2004 	struct amdgpu_ring *ring;
2005 	struct amdgpu_kiq *kiq;
2006 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2007 	unsigned int hw_prio;
2008 
2009 	switch (adev->ip_versions[GC_HWIP][0]) {
2010 	case IP_VERSION(9, 0, 1):
2011 	case IP_VERSION(9, 2, 1):
2012 	case IP_VERSION(9, 4, 0):
2013 	case IP_VERSION(9, 2, 2):
2014 	case IP_VERSION(9, 1, 0):
2015 	case IP_VERSION(9, 4, 1):
2016 	case IP_VERSION(9, 3, 0):
2017 	case IP_VERSION(9, 4, 2):
2018 		adev->gfx.mec.num_mec = 2;
2019 		break;
2020 	default:
2021 		adev->gfx.mec.num_mec = 1;
2022 		break;
2023 	}
2024 
2025 	adev->gfx.mec.num_pipe_per_mec = 4;
2026 	adev->gfx.mec.num_queue_per_pipe = 8;
2027 
2028 	/* EOP Event */
2029 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2030 	if (r)
2031 		return r;
2032 
2033 	/* Privileged reg */
2034 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2035 			      &adev->gfx.priv_reg_irq);
2036 	if (r)
2037 		return r;
2038 
2039 	/* Privileged inst */
2040 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2041 			      &adev->gfx.priv_inst_irq);
2042 	if (r)
2043 		return r;
2044 
2045 	/* ECC error */
2046 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2047 			      &adev->gfx.cp_ecc_error_irq);
2048 	if (r)
2049 		return r;
2050 
2051 	/* FUE error */
2052 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2053 			      &adev->gfx.cp_ecc_error_irq);
2054 	if (r)
2055 		return r;
2056 
2057 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2058 
2059 	if (adev->gfx.rlc.funcs) {
2060 		if (adev->gfx.rlc.funcs->init) {
2061 			r = adev->gfx.rlc.funcs->init(adev);
2062 			if (r) {
2063 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2064 				return r;
2065 			}
2066 		}
2067 	}
2068 
2069 	r = gfx_v9_0_mec_init(adev);
2070 	if (r) {
2071 		DRM_ERROR("Failed to init MEC BOs!\n");
2072 		return r;
2073 	}
2074 
2075 	/* set up the gfx ring */
2076 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2077 		ring = &adev->gfx.gfx_ring[i];
2078 		ring->ring_obj = NULL;
2079 		if (!i)
2080 			sprintf(ring->name, "gfx");
2081 		else
2082 			sprintf(ring->name, "gfx_%d", i);
2083 		ring->use_doorbell = true;
2084 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2085 
2086 		/* disable scheduler on the real ring */
2087 		ring->no_scheduler = true;
2088 		ring->vm_hub = AMDGPU_GFXHUB(0);
2089 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2090 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2091 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2092 		if (r)
2093 			return r;
2094 	}
2095 
2096 	/* set up the software rings */
2097 	if (adev->gfx.num_gfx_rings) {
2098 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2099 			ring = &adev->gfx.sw_gfx_ring[i];
2100 			ring->ring_obj = NULL;
2101 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2102 			ring->use_doorbell = true;
2103 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2104 			ring->is_sw_ring = true;
2105 			hw_prio = amdgpu_sw_ring_priority(i);
2106 			ring->vm_hub = AMDGPU_GFXHUB(0);
2107 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2108 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2109 					     NULL);
2110 			if (r)
2111 				return r;
2112 			ring->wptr = 0;
2113 		}
2114 
2115 		/* init the muxer and add software rings */
2116 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2117 					 GFX9_NUM_SW_GFX_RINGS);
2118 		if (r) {
2119 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2120 			return r;
2121 		}
2122 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2123 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2124 							&adev->gfx.sw_gfx_ring[i]);
2125 			if (r) {
2126 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2127 				return r;
2128 			}
2129 		}
2130 	}
2131 
2132 	/* set up the compute queues - allocate horizontally across pipes */
2133 	ring_id = 0;
2134 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2135 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2136 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2137 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2138 								     k, j))
2139 					continue;
2140 
2141 				r = gfx_v9_0_compute_ring_init(adev,
2142 							       ring_id,
2143 							       i, k, j);
2144 				if (r)
2145 					return r;
2146 
2147 				ring_id++;
2148 			}
2149 		}
2150 	}
2151 
2152 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2153 	if (r) {
2154 		DRM_ERROR("Failed to init KIQ BOs!\n");
2155 		return r;
2156 	}
2157 
2158 	kiq = &adev->gfx.kiq[0];
2159 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
2160 	if (r)
2161 		return r;
2162 
2163 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2164 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2165 	if (r)
2166 		return r;
2167 
2168 	adev->gfx.ce_ram_size = 0x8000;
2169 
2170 	r = gfx_v9_0_gpu_early_init(adev);
2171 	if (r)
2172 		return r;
2173 
2174 	if (amdgpu_gfx_ras_sw_init(adev)) {
2175 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2176 		return -EINVAL;
2177 	}
2178 
2179 	return 0;
2180 }
2181 
2182 
2183 static int gfx_v9_0_sw_fini(void *handle)
2184 {
2185 	int i;
2186 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2187 
2188 	if (adev->gfx.num_gfx_rings) {
2189 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2190 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2191 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2192 	}
2193 
2194 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2195 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2196 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2197 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2198 
2199 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2200 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2201 	amdgpu_gfx_kiq_fini(adev, 0);
2202 
2203 	gfx_v9_0_mec_fini(adev);
2204 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2205 				&adev->gfx.rlc.clear_state_gpu_addr,
2206 				(void **)&adev->gfx.rlc.cs_ptr);
2207 	if (adev->flags & AMD_IS_APU) {
2208 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2209 				&adev->gfx.rlc.cp_table_gpu_addr,
2210 				(void **)&adev->gfx.rlc.cp_table_ptr);
2211 	}
2212 	gfx_v9_0_free_microcode(adev);
2213 
2214 	return 0;
2215 }
2216 
2217 
2218 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2219 {
2220 	/* TODO */
2221 }
2222 
2223 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2224 			   u32 instance, int xcc_id)
2225 {
2226 	u32 data;
2227 
2228 	if (instance == 0xffffffff)
2229 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2230 	else
2231 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2232 
2233 	if (se_num == 0xffffffff)
2234 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2235 	else
2236 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2237 
2238 	if (sh_num == 0xffffffff)
2239 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2240 	else
2241 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2242 
2243 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2244 }
2245 
2246 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2247 {
2248 	u32 data, mask;
2249 
2250 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2251 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2252 
2253 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2254 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2255 
2256 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2257 					 adev->gfx.config.max_sh_per_se);
2258 
2259 	return (~data) & mask;
2260 }
2261 
2262 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2263 {
2264 	int i, j;
2265 	u32 data;
2266 	u32 active_rbs = 0;
2267 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2268 					adev->gfx.config.max_sh_per_se;
2269 
2270 	mutex_lock(&adev->grbm_idx_mutex);
2271 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2272 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2273 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2274 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2275 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2276 					       rb_bitmap_width_per_sh);
2277 		}
2278 	}
2279 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2280 	mutex_unlock(&adev->grbm_idx_mutex);
2281 
2282 	adev->gfx.config.backend_enable_mask = active_rbs;
2283 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2284 }
2285 
2286 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2287 				uint32_t first_vmid,
2288 				uint32_t last_vmid)
2289 {
2290 	uint32_t data;
2291 	uint32_t trap_config_vmid_mask = 0;
2292 	int i;
2293 
2294 	/* Calculate trap config vmid mask */
2295 	for (i = first_vmid; i < last_vmid; i++)
2296 		trap_config_vmid_mask |= (1 << i);
2297 
2298 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2299 			VMID_SEL, trap_config_vmid_mask);
2300 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2301 			TRAP_EN, 1);
2302 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2303 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2304 
2305 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2306 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2307 }
2308 
2309 #define DEFAULT_SH_MEM_BASES	(0x6000)
2310 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2311 {
2312 	int i;
2313 	uint32_t sh_mem_config;
2314 	uint32_t sh_mem_bases;
2315 
2316 	/*
2317 	 * Configure apertures:
2318 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2319 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2320 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2321 	 */
2322 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2323 
2324 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2325 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2326 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2327 
2328 	mutex_lock(&adev->srbm_mutex);
2329 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2330 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2331 		/* CP and shaders */
2332 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2333 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2334 	}
2335 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2336 	mutex_unlock(&adev->srbm_mutex);
2337 
2338 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2339 	   access. These should be enabled by FW for target VMIDs. */
2340 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2341 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2342 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2343 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2344 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2345 	}
2346 }
2347 
2348 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2349 {
2350 	int vmid;
2351 
2352 	/*
2353 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2354 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2355 	 * the driver can enable them for graphics. VMID0 should maintain
2356 	 * access so that HWS firmware can save/restore entries.
2357 	 */
2358 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2359 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2360 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2361 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2362 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2363 	}
2364 }
2365 
2366 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2367 {
2368 	uint32_t tmp;
2369 
2370 	switch (adev->ip_versions[GC_HWIP][0]) {
2371 	case IP_VERSION(9, 4, 1):
2372 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2373 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2374 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2375 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2376 		break;
2377 	default:
2378 		break;
2379 	}
2380 }
2381 
2382 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2383 {
2384 	u32 tmp;
2385 	int i;
2386 
2387 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2388 
2389 	gfx_v9_0_tiling_mode_table_init(adev);
2390 
2391 	if (adev->gfx.num_gfx_rings)
2392 		gfx_v9_0_setup_rb(adev);
2393 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2394 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2395 
2396 	/* XXX SH_MEM regs */
2397 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2398 	mutex_lock(&adev->srbm_mutex);
2399 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2400 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2401 		/* CP and shaders */
2402 		if (i == 0) {
2403 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2404 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2405 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2406 					    !!adev->gmc.noretry);
2407 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2408 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2409 		} else {
2410 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2411 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2412 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2413 					    !!adev->gmc.noretry);
2414 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2415 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2416 				(adev->gmc.private_aperture_start >> 48));
2417 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2418 				(adev->gmc.shared_aperture_start >> 48));
2419 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2420 		}
2421 	}
2422 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2423 
2424 	mutex_unlock(&adev->srbm_mutex);
2425 
2426 	gfx_v9_0_init_compute_vmid(adev);
2427 	gfx_v9_0_init_gds_vmid(adev);
2428 	gfx_v9_0_init_sq_config(adev);
2429 }
2430 
2431 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2432 {
2433 	u32 i, j, k;
2434 	u32 mask;
2435 
2436 	mutex_lock(&adev->grbm_idx_mutex);
2437 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2438 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2439 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2440 			for (k = 0; k < adev->usec_timeout; k++) {
2441 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2442 					break;
2443 				udelay(1);
2444 			}
2445 			if (k == adev->usec_timeout) {
2446 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2447 						      0xffffffff, 0xffffffff, 0);
2448 				mutex_unlock(&adev->grbm_idx_mutex);
2449 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2450 					 i, j);
2451 				return;
2452 			}
2453 		}
2454 	}
2455 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2456 	mutex_unlock(&adev->grbm_idx_mutex);
2457 
2458 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2459 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2460 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2461 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2462 	for (k = 0; k < adev->usec_timeout; k++) {
2463 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2464 			break;
2465 		udelay(1);
2466 	}
2467 }
2468 
2469 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2470 					       bool enable)
2471 {
2472 	u32 tmp;
2473 
2474 	/* These interrupts should be enabled to drive DS clock */
2475 
2476 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2477 
2478 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2479 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2480 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2481 	if(adev->gfx.num_gfx_rings)
2482 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2483 
2484 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2485 }
2486 
2487 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2488 {
2489 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2490 	/* csib */
2491 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2492 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2493 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2494 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2495 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2496 			adev->gfx.rlc.clear_state_size);
2497 }
2498 
2499 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2500 				int indirect_offset,
2501 				int list_size,
2502 				int *unique_indirect_regs,
2503 				int unique_indirect_reg_count,
2504 				int *indirect_start_offsets,
2505 				int *indirect_start_offsets_count,
2506 				int max_start_offsets_count)
2507 {
2508 	int idx;
2509 
2510 	for (; indirect_offset < list_size; indirect_offset++) {
2511 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2512 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2513 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2514 
2515 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2516 			indirect_offset += 2;
2517 
2518 			/* look for the matching indice */
2519 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2520 				if (unique_indirect_regs[idx] ==
2521 					register_list_format[indirect_offset] ||
2522 					!unique_indirect_regs[idx])
2523 					break;
2524 			}
2525 
2526 			BUG_ON(idx >= unique_indirect_reg_count);
2527 
2528 			if (!unique_indirect_regs[idx])
2529 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2530 
2531 			indirect_offset++;
2532 		}
2533 	}
2534 }
2535 
2536 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2537 {
2538 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2539 	int unique_indirect_reg_count = 0;
2540 
2541 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2542 	int indirect_start_offsets_count = 0;
2543 
2544 	int list_size = 0;
2545 	int i = 0, j = 0;
2546 	u32 tmp = 0;
2547 
2548 	u32 *register_list_format =
2549 		kmemdup(adev->gfx.rlc.register_list_format,
2550 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2551 	if (!register_list_format)
2552 		return -ENOMEM;
2553 
2554 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2555 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2556 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2557 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2558 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2559 				    unique_indirect_regs,
2560 				    unique_indirect_reg_count,
2561 				    indirect_start_offsets,
2562 				    &indirect_start_offsets_count,
2563 				    ARRAY_SIZE(indirect_start_offsets));
2564 
2565 	/* enable auto inc in case it is disabled */
2566 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2567 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2568 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2569 
2570 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2571 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2572 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2573 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2574 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2575 			adev->gfx.rlc.register_restore[i]);
2576 
2577 	/* load indirect register */
2578 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2579 		adev->gfx.rlc.reg_list_format_start);
2580 
2581 	/* direct register portion */
2582 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2583 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2584 			register_list_format[i]);
2585 
2586 	/* indirect register portion */
2587 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2588 		if (register_list_format[i] == 0xFFFFFFFF) {
2589 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2590 			continue;
2591 		}
2592 
2593 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2595 
2596 		for (j = 0; j < unique_indirect_reg_count; j++) {
2597 			if (register_list_format[i] == unique_indirect_regs[j]) {
2598 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2599 				break;
2600 			}
2601 		}
2602 
2603 		BUG_ON(j >= unique_indirect_reg_count);
2604 
2605 		i++;
2606 	}
2607 
2608 	/* set save/restore list size */
2609 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2610 	list_size = list_size >> 1;
2611 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2612 		adev->gfx.rlc.reg_restore_list_size);
2613 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2614 
2615 	/* write the starting offsets to RLC scratch ram */
2616 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2617 		adev->gfx.rlc.starting_offsets_start);
2618 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2619 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2620 		       indirect_start_offsets[i]);
2621 
2622 	/* load unique indirect regs*/
2623 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2624 		if (unique_indirect_regs[i] != 0) {
2625 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2626 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2627 			       unique_indirect_regs[i] & 0x3FFFF);
2628 
2629 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2630 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2631 			       unique_indirect_regs[i] >> 20);
2632 		}
2633 	}
2634 
2635 	kfree(register_list_format);
2636 	return 0;
2637 }
2638 
2639 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2640 {
2641 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2642 }
2643 
2644 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2645 					     bool enable)
2646 {
2647 	uint32_t data = 0;
2648 	uint32_t default_data = 0;
2649 
2650 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2651 	if (enable) {
2652 		/* enable GFXIP control over CGPG */
2653 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2654 		if(default_data != data)
2655 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2656 
2657 		/* update status */
2658 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2659 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2660 		if(default_data != data)
2661 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2662 	} else {
2663 		/* restore GFXIP control over GCPG */
2664 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2665 		if(default_data != data)
2666 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2667 	}
2668 }
2669 
2670 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2671 {
2672 	uint32_t data = 0;
2673 
2674 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2675 			      AMD_PG_SUPPORT_GFX_SMG |
2676 			      AMD_PG_SUPPORT_GFX_DMG)) {
2677 		/* init IDLE_POLL_COUNT = 60 */
2678 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2679 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2680 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2681 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2682 
2683 		/* init RLC PG Delay */
2684 		data = 0;
2685 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2686 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2687 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2688 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2689 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2690 
2691 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2692 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2693 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2694 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2695 
2696 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2697 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2698 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2699 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2700 
2701 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2702 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2703 
2704 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2705 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2706 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2707 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2708 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2709 	}
2710 }
2711 
2712 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2713 						bool enable)
2714 {
2715 	uint32_t data = 0;
2716 	uint32_t default_data = 0;
2717 
2718 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2719 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2720 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2721 			     enable ? 1 : 0);
2722 	if (default_data != data)
2723 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2724 }
2725 
2726 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2727 						bool enable)
2728 {
2729 	uint32_t data = 0;
2730 	uint32_t default_data = 0;
2731 
2732 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2733 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2734 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2735 			     enable ? 1 : 0);
2736 	if(default_data != data)
2737 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2738 }
2739 
2740 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2741 					bool enable)
2742 {
2743 	uint32_t data = 0;
2744 	uint32_t default_data = 0;
2745 
2746 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2747 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2748 			     CP_PG_DISABLE,
2749 			     enable ? 0 : 1);
2750 	if(default_data != data)
2751 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2752 }
2753 
2754 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2755 						bool enable)
2756 {
2757 	uint32_t data, default_data;
2758 
2759 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2760 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2761 			     GFX_POWER_GATING_ENABLE,
2762 			     enable ? 1 : 0);
2763 	if(default_data != data)
2764 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2765 }
2766 
2767 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2768 						bool enable)
2769 {
2770 	uint32_t data, default_data;
2771 
2772 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2773 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2774 			     GFX_PIPELINE_PG_ENABLE,
2775 			     enable ? 1 : 0);
2776 	if(default_data != data)
2777 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2778 
2779 	if (!enable)
2780 		/* read any GFX register to wake up GFX */
2781 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2782 }
2783 
2784 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2785 						       bool enable)
2786 {
2787 	uint32_t data, default_data;
2788 
2789 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2790 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2791 			     STATIC_PER_CU_PG_ENABLE,
2792 			     enable ? 1 : 0);
2793 	if(default_data != data)
2794 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2795 }
2796 
2797 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2798 						bool enable)
2799 {
2800 	uint32_t data, default_data;
2801 
2802 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2803 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2804 			     DYN_PER_CU_PG_ENABLE,
2805 			     enable ? 1 : 0);
2806 	if(default_data != data)
2807 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2808 }
2809 
2810 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2811 {
2812 	gfx_v9_0_init_csb(adev);
2813 
2814 	/*
2815 	 * Rlc save restore list is workable since v2_1.
2816 	 * And it's needed by gfxoff feature.
2817 	 */
2818 	if (adev->gfx.rlc.is_rlc_v2_1) {
2819 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2820 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2821 			gfx_v9_1_init_rlc_save_restore_list(adev);
2822 		gfx_v9_0_enable_save_restore_machine(adev);
2823 	}
2824 
2825 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2826 			      AMD_PG_SUPPORT_GFX_SMG |
2827 			      AMD_PG_SUPPORT_GFX_DMG |
2828 			      AMD_PG_SUPPORT_CP |
2829 			      AMD_PG_SUPPORT_GDS |
2830 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2831 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2832 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2833 		gfx_v9_0_init_gfx_power_gating(adev);
2834 	}
2835 }
2836 
2837 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2838 {
2839 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2840 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2841 	gfx_v9_0_wait_for_rlc_serdes(adev);
2842 }
2843 
2844 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2845 {
2846 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2847 	udelay(50);
2848 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2849 	udelay(50);
2850 }
2851 
2852 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2853 {
2854 #ifdef AMDGPU_RLC_DEBUG_RETRY
2855 	u32 rlc_ucode_ver;
2856 #endif
2857 
2858 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2859 	udelay(50);
2860 
2861 	/* carrizo do enable cp interrupt after cp inited */
2862 	if (!(adev->flags & AMD_IS_APU)) {
2863 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2864 		udelay(50);
2865 	}
2866 
2867 #ifdef AMDGPU_RLC_DEBUG_RETRY
2868 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2869 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2870 	if(rlc_ucode_ver == 0x108) {
2871 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2872 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2873 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2874 		 * default is 0x9C4 to create a 100us interval */
2875 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2876 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2877 		 * to disable the page fault retry interrupts, default is
2878 		 * 0x100 (256) */
2879 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2880 	}
2881 #endif
2882 }
2883 
2884 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2885 {
2886 	const struct rlc_firmware_header_v2_0 *hdr;
2887 	const __le32 *fw_data;
2888 	unsigned i, fw_size;
2889 
2890 	if (!adev->gfx.rlc_fw)
2891 		return -EINVAL;
2892 
2893 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2894 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2895 
2896 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2897 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2898 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2899 
2900 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2901 			RLCG_UCODE_LOADING_START_ADDRESS);
2902 	for (i = 0; i < fw_size; i++)
2903 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2904 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2905 
2906 	return 0;
2907 }
2908 
2909 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2910 {
2911 	int r;
2912 
2913 	if (amdgpu_sriov_vf(adev)) {
2914 		gfx_v9_0_init_csb(adev);
2915 		return 0;
2916 	}
2917 
2918 	adev->gfx.rlc.funcs->stop(adev);
2919 
2920 	/* disable CG */
2921 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2922 
2923 	gfx_v9_0_init_pg(adev);
2924 
2925 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2926 		/* legacy rlc firmware loading */
2927 		r = gfx_v9_0_rlc_load_microcode(adev);
2928 		if (r)
2929 			return r;
2930 	}
2931 
2932 	switch (adev->ip_versions[GC_HWIP][0]) {
2933 	case IP_VERSION(9, 2, 2):
2934 	case IP_VERSION(9, 1, 0):
2935 		gfx_v9_0_init_lbpw(adev);
2936 		if (amdgpu_lbpw == 0)
2937 			gfx_v9_0_enable_lbpw(adev, false);
2938 		else
2939 			gfx_v9_0_enable_lbpw(adev, true);
2940 		break;
2941 	case IP_VERSION(9, 4, 0):
2942 		gfx_v9_4_init_lbpw(adev);
2943 		if (amdgpu_lbpw > 0)
2944 			gfx_v9_0_enable_lbpw(adev, true);
2945 		else
2946 			gfx_v9_0_enable_lbpw(adev, false);
2947 		break;
2948 	default:
2949 		break;
2950 	}
2951 
2952 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
2953 
2954 	adev->gfx.rlc.funcs->start(adev);
2955 
2956 	return 0;
2957 }
2958 
2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2960 {
2961 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2962 
2963 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2964 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2965 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2966 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2967 	udelay(50);
2968 }
2969 
2970 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2971 {
2972 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2973 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2974 	const struct gfx_firmware_header_v1_0 *me_hdr;
2975 	const __le32 *fw_data;
2976 	unsigned i, fw_size;
2977 
2978 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2979 		return -EINVAL;
2980 
2981 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2982 		adev->gfx.pfp_fw->data;
2983 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2984 		adev->gfx.ce_fw->data;
2985 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2986 		adev->gfx.me_fw->data;
2987 
2988 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2989 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2990 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2991 
2992 	gfx_v9_0_cp_gfx_enable(adev, false);
2993 
2994 	/* PFP */
2995 	fw_data = (const __le32 *)
2996 		(adev->gfx.pfp_fw->data +
2997 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2998 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2999 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3000 	for (i = 0; i < fw_size; i++)
3001 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3002 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3003 
3004 	/* CE */
3005 	fw_data = (const __le32 *)
3006 		(adev->gfx.ce_fw->data +
3007 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3008 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3009 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3010 	for (i = 0; i < fw_size; i++)
3011 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3012 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3013 
3014 	/* ME */
3015 	fw_data = (const __le32 *)
3016 		(adev->gfx.me_fw->data +
3017 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3018 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3019 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3020 	for (i = 0; i < fw_size; i++)
3021 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3022 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3023 
3024 	return 0;
3025 }
3026 
3027 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3028 {
3029 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3030 	const struct cs_section_def *sect = NULL;
3031 	const struct cs_extent_def *ext = NULL;
3032 	int r, i, tmp;
3033 
3034 	/* init the CP */
3035 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3036 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3037 
3038 	gfx_v9_0_cp_gfx_enable(adev, true);
3039 
3040 	/* Now only limit the quirk on the APU gfx9 series and already
3041 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3042 	 */
3043 	if (adev->flags & AMD_IS_APU &&
3044 			adev->in_s3 && !adev->suspend_complete) {
3045 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3046 		return 0;
3047 	}
3048 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3049 	if (r) {
3050 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3051 		return r;
3052 	}
3053 
3054 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3055 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3056 
3057 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3058 	amdgpu_ring_write(ring, 0x80000000);
3059 	amdgpu_ring_write(ring, 0x80000000);
3060 
3061 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3062 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3063 			if (sect->id == SECT_CONTEXT) {
3064 				amdgpu_ring_write(ring,
3065 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3066 					       ext->reg_count));
3067 				amdgpu_ring_write(ring,
3068 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3069 				for (i = 0; i < ext->reg_count; i++)
3070 					amdgpu_ring_write(ring, ext->extent[i]);
3071 			}
3072 		}
3073 	}
3074 
3075 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3076 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3077 
3078 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3079 	amdgpu_ring_write(ring, 0);
3080 
3081 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3082 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3083 	amdgpu_ring_write(ring, 0x8000);
3084 	amdgpu_ring_write(ring, 0x8000);
3085 
3086 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3087 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3088 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3089 	amdgpu_ring_write(ring, tmp);
3090 	amdgpu_ring_write(ring, 0);
3091 
3092 	amdgpu_ring_commit(ring);
3093 
3094 	return 0;
3095 }
3096 
3097 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3098 {
3099 	struct amdgpu_ring *ring;
3100 	u32 tmp;
3101 	u32 rb_bufsz;
3102 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3103 
3104 	/* Set the write pointer delay */
3105 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3106 
3107 	/* set the RB to use vmid 0 */
3108 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3109 
3110 	/* Set ring buffer size */
3111 	ring = &adev->gfx.gfx_ring[0];
3112 	rb_bufsz = order_base_2(ring->ring_size / 8);
3113 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3114 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3115 #ifdef __BIG_ENDIAN
3116 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3117 #endif
3118 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3119 
3120 	/* Initialize the ring buffer's write pointers */
3121 	ring->wptr = 0;
3122 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3123 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3124 
3125 	/* set the wb address wether it's enabled or not */
3126 	rptr_addr = ring->rptr_gpu_addr;
3127 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3128 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3129 
3130 	wptr_gpu_addr = ring->wptr_gpu_addr;
3131 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3132 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3133 
3134 	mdelay(1);
3135 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3136 
3137 	rb_addr = ring->gpu_addr >> 8;
3138 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3139 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3140 
3141 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3142 	if (ring->use_doorbell) {
3143 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3144 				    DOORBELL_OFFSET, ring->doorbell_index);
3145 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3146 				    DOORBELL_EN, 1);
3147 	} else {
3148 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3149 	}
3150 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3151 
3152 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3153 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3154 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3155 
3156 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3157 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3158 
3159 
3160 	/* start the ring */
3161 	gfx_v9_0_cp_gfx_start(adev);
3162 
3163 	return 0;
3164 }
3165 
3166 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3167 {
3168 	if (enable) {
3169 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3170 	} else {
3171 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3172 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3173 		adev->gfx.kiq[0].ring.sched.ready = false;
3174 	}
3175 	udelay(50);
3176 }
3177 
3178 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3179 {
3180 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3181 	const __le32 *fw_data;
3182 	unsigned i;
3183 	u32 tmp;
3184 
3185 	if (!adev->gfx.mec_fw)
3186 		return -EINVAL;
3187 
3188 	gfx_v9_0_cp_compute_enable(adev, false);
3189 
3190 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3191 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3192 
3193 	fw_data = (const __le32 *)
3194 		(adev->gfx.mec_fw->data +
3195 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3196 	tmp = 0;
3197 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3198 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3199 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3200 
3201 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3202 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3203 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3204 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3205 
3206 	/* MEC1 */
3207 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3208 			 mec_hdr->jt_offset);
3209 	for (i = 0; i < mec_hdr->jt_size; i++)
3210 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3211 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3212 
3213 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3214 			adev->gfx.mec_fw_version);
3215 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3216 
3217 	return 0;
3218 }
3219 
3220 /* KIQ functions */
3221 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3222 {
3223 	uint32_t tmp;
3224 	struct amdgpu_device *adev = ring->adev;
3225 
3226 	/* tell RLC which is KIQ queue */
3227 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3228 	tmp &= 0xffffff00;
3229 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3230 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3231 	tmp |= 0x80;
3232 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3233 }
3234 
3235 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3236 {
3237 	struct amdgpu_device *adev = ring->adev;
3238 
3239 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3240 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3241 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3242 			mqd->cp_hqd_queue_priority =
3243 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3244 		}
3245 	}
3246 }
3247 
3248 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3249 {
3250 	struct amdgpu_device *adev = ring->adev;
3251 	struct v9_mqd *mqd = ring->mqd_ptr;
3252 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3253 	uint32_t tmp;
3254 
3255 	mqd->header = 0xC0310800;
3256 	mqd->compute_pipelinestat_enable = 0x00000001;
3257 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3258 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3259 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3260 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3261 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3262 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3263 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3264 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3265 	mqd->compute_misc_reserved = 0x00000003;
3266 
3267 	mqd->dynamic_cu_mask_addr_lo =
3268 		lower_32_bits(ring->mqd_gpu_addr
3269 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3270 	mqd->dynamic_cu_mask_addr_hi =
3271 		upper_32_bits(ring->mqd_gpu_addr
3272 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3273 
3274 	eop_base_addr = ring->eop_gpu_addr >> 8;
3275 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3276 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3277 
3278 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3279 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3280 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3281 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3282 
3283 	mqd->cp_hqd_eop_control = tmp;
3284 
3285 	/* enable doorbell? */
3286 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3287 
3288 	if (ring->use_doorbell) {
3289 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3290 				    DOORBELL_OFFSET, ring->doorbell_index);
3291 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3292 				    DOORBELL_EN, 1);
3293 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3294 				    DOORBELL_SOURCE, 0);
3295 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3296 				    DOORBELL_HIT, 0);
3297 	} else {
3298 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3299 					 DOORBELL_EN, 0);
3300 	}
3301 
3302 	mqd->cp_hqd_pq_doorbell_control = tmp;
3303 
3304 	/* disable the queue if it's active */
3305 	ring->wptr = 0;
3306 	mqd->cp_hqd_dequeue_request = 0;
3307 	mqd->cp_hqd_pq_rptr = 0;
3308 	mqd->cp_hqd_pq_wptr_lo = 0;
3309 	mqd->cp_hqd_pq_wptr_hi = 0;
3310 
3311 	/* set the pointer to the MQD */
3312 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3313 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3314 
3315 	/* set MQD vmid to 0 */
3316 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3317 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3318 	mqd->cp_mqd_control = tmp;
3319 
3320 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3321 	hqd_gpu_addr = ring->gpu_addr >> 8;
3322 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3323 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3324 
3325 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3326 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3327 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3328 			    (order_base_2(ring->ring_size / 4) - 1));
3329 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3330 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3331 #ifdef __BIG_ENDIAN
3332 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3333 #endif
3334 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3335 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3336 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3337 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3338 	mqd->cp_hqd_pq_control = tmp;
3339 
3340 	/* set the wb address whether it's enabled or not */
3341 	wb_gpu_addr = ring->rptr_gpu_addr;
3342 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3343 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3344 		upper_32_bits(wb_gpu_addr) & 0xffff;
3345 
3346 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3347 	wb_gpu_addr = ring->wptr_gpu_addr;
3348 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3349 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3350 
3351 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3352 	ring->wptr = 0;
3353 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3354 
3355 	/* set the vmid for the queue */
3356 	mqd->cp_hqd_vmid = 0;
3357 
3358 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3359 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3360 	mqd->cp_hqd_persistent_state = tmp;
3361 
3362 	/* set MIN_IB_AVAIL_SIZE */
3363 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3364 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3365 	mqd->cp_hqd_ib_control = tmp;
3366 
3367 	/* set static priority for a queue/ring */
3368 	gfx_v9_0_mqd_set_priority(ring, mqd);
3369 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3370 
3371 	/* map_queues packet doesn't need activate the queue,
3372 	 * so only kiq need set this field.
3373 	 */
3374 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3375 		mqd->cp_hqd_active = 1;
3376 
3377 	return 0;
3378 }
3379 
3380 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3381 {
3382 	struct amdgpu_device *adev = ring->adev;
3383 	struct v9_mqd *mqd = ring->mqd_ptr;
3384 	int j;
3385 
3386 	/* disable wptr polling */
3387 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3388 
3389 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3390 	       mqd->cp_hqd_eop_base_addr_lo);
3391 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3392 	       mqd->cp_hqd_eop_base_addr_hi);
3393 
3394 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3395 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3396 	       mqd->cp_hqd_eop_control);
3397 
3398 	/* enable doorbell? */
3399 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3400 	       mqd->cp_hqd_pq_doorbell_control);
3401 
3402 	/* disable the queue if it's active */
3403 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3404 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3405 		for (j = 0; j < adev->usec_timeout; j++) {
3406 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3407 				break;
3408 			udelay(1);
3409 		}
3410 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3411 		       mqd->cp_hqd_dequeue_request);
3412 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3413 		       mqd->cp_hqd_pq_rptr);
3414 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3415 		       mqd->cp_hqd_pq_wptr_lo);
3416 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3417 		       mqd->cp_hqd_pq_wptr_hi);
3418 	}
3419 
3420 	/* set the pointer to the MQD */
3421 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3422 	       mqd->cp_mqd_base_addr_lo);
3423 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3424 	       mqd->cp_mqd_base_addr_hi);
3425 
3426 	/* set MQD vmid to 0 */
3427 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3428 	       mqd->cp_mqd_control);
3429 
3430 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3431 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3432 	       mqd->cp_hqd_pq_base_lo);
3433 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3434 	       mqd->cp_hqd_pq_base_hi);
3435 
3436 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3437 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3438 	       mqd->cp_hqd_pq_control);
3439 
3440 	/* set the wb address whether it's enabled or not */
3441 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3442 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3443 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3444 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3445 
3446 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3447 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3448 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3449 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3450 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3451 
3452 	/* enable the doorbell if requested */
3453 	if (ring->use_doorbell) {
3454 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3455 					(adev->doorbell_index.kiq * 2) << 2);
3456 		/* If GC has entered CGPG, ringing doorbell > first page
3457 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3458 		 * workaround this issue. And this change has to align with firmware
3459 		 * update.
3460 		 */
3461 		if (check_if_enlarge_doorbell_range(adev))
3462 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3463 					(adev->doorbell.size - 4));
3464 		else
3465 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3466 					(adev->doorbell_index.userqueue_end * 2) << 2);
3467 	}
3468 
3469 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3470 	       mqd->cp_hqd_pq_doorbell_control);
3471 
3472 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3473 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3474 	       mqd->cp_hqd_pq_wptr_lo);
3475 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3476 	       mqd->cp_hqd_pq_wptr_hi);
3477 
3478 	/* set the vmid for the queue */
3479 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3480 
3481 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3482 	       mqd->cp_hqd_persistent_state);
3483 
3484 	/* activate the queue */
3485 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3486 	       mqd->cp_hqd_active);
3487 
3488 	if (ring->use_doorbell)
3489 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3490 
3491 	return 0;
3492 }
3493 
3494 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3495 {
3496 	struct amdgpu_device *adev = ring->adev;
3497 	int j;
3498 
3499 	/* disable the queue if it's active */
3500 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3501 
3502 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3503 
3504 		for (j = 0; j < adev->usec_timeout; j++) {
3505 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3506 				break;
3507 			udelay(1);
3508 		}
3509 
3510 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3511 			DRM_DEBUG("KIQ dequeue request failed.\n");
3512 
3513 			/* Manual disable if dequeue request times out */
3514 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3515 		}
3516 
3517 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3518 		      0);
3519 	}
3520 
3521 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3522 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3523 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3524 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3525 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3526 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3527 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3528 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3529 
3530 	return 0;
3531 }
3532 
3533 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3534 {
3535 	struct amdgpu_device *adev = ring->adev;
3536 	struct v9_mqd *mqd = ring->mqd_ptr;
3537 	struct v9_mqd *tmp_mqd;
3538 
3539 	gfx_v9_0_kiq_setting(ring);
3540 
3541 	/* GPU could be in bad state during probe, driver trigger the reset
3542 	 * after load the SMU, in this case , the mqd is not be initialized.
3543 	 * driver need to re-init the mqd.
3544 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3545 	 */
3546 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3547 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3548 		/* for GPU_RESET case , reset MQD to a clean status */
3549 		if (adev->gfx.kiq[0].mqd_backup)
3550 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3551 
3552 		/* reset ring buffer */
3553 		ring->wptr = 0;
3554 		amdgpu_ring_clear_ring(ring);
3555 
3556 		mutex_lock(&adev->srbm_mutex);
3557 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3558 		gfx_v9_0_kiq_init_register(ring);
3559 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3560 		mutex_unlock(&adev->srbm_mutex);
3561 	} else {
3562 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3563 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3564 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3565 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3566 			amdgpu_ring_clear_ring(ring);
3567 		mutex_lock(&adev->srbm_mutex);
3568 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3569 		gfx_v9_0_mqd_init(ring);
3570 		gfx_v9_0_kiq_init_register(ring);
3571 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3572 		mutex_unlock(&adev->srbm_mutex);
3573 
3574 		if (adev->gfx.kiq[0].mqd_backup)
3575 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3576 	}
3577 
3578 	return 0;
3579 }
3580 
3581 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3582 {
3583 	struct amdgpu_device *adev = ring->adev;
3584 	struct v9_mqd *mqd = ring->mqd_ptr;
3585 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3586 	struct v9_mqd *tmp_mqd;
3587 
3588 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3589 	 * is not be initialized before
3590 	 */
3591 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3592 
3593 	if (!tmp_mqd->cp_hqd_pq_control ||
3594 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3595 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3596 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3597 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3598 		mutex_lock(&adev->srbm_mutex);
3599 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3600 		gfx_v9_0_mqd_init(ring);
3601 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3602 		mutex_unlock(&adev->srbm_mutex);
3603 
3604 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3605 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3606 	} else {
3607 		/* restore MQD to a clean status */
3608 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3609 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3610 		/* reset ring buffer */
3611 		ring->wptr = 0;
3612 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3613 		amdgpu_ring_clear_ring(ring);
3614 	}
3615 
3616 	return 0;
3617 }
3618 
3619 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3620 {
3621 	struct amdgpu_ring *ring;
3622 	int r;
3623 
3624 	ring = &adev->gfx.kiq[0].ring;
3625 
3626 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3627 	if (unlikely(r != 0))
3628 		return r;
3629 
3630 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3631 	if (unlikely(r != 0)) {
3632 		amdgpu_bo_unreserve(ring->mqd_obj);
3633 		return r;
3634 	}
3635 
3636 	gfx_v9_0_kiq_init_queue(ring);
3637 	amdgpu_bo_kunmap(ring->mqd_obj);
3638 	ring->mqd_ptr = NULL;
3639 	amdgpu_bo_unreserve(ring->mqd_obj);
3640 	return 0;
3641 }
3642 
3643 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3644 {
3645 	struct amdgpu_ring *ring = NULL;
3646 	int r = 0, i;
3647 
3648 	gfx_v9_0_cp_compute_enable(adev, true);
3649 
3650 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3651 		ring = &adev->gfx.compute_ring[i];
3652 
3653 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3654 		if (unlikely(r != 0))
3655 			goto done;
3656 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3657 		if (!r) {
3658 			r = gfx_v9_0_kcq_init_queue(ring);
3659 			amdgpu_bo_kunmap(ring->mqd_obj);
3660 			ring->mqd_ptr = NULL;
3661 		}
3662 		amdgpu_bo_unreserve(ring->mqd_obj);
3663 		if (r)
3664 			goto done;
3665 	}
3666 
3667 	r = amdgpu_gfx_enable_kcq(adev, 0);
3668 done:
3669 	return r;
3670 }
3671 
3672 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3673 {
3674 	int r, i;
3675 	struct amdgpu_ring *ring;
3676 
3677 	if (!(adev->flags & AMD_IS_APU))
3678 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3679 
3680 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3681 		if (adev->gfx.num_gfx_rings) {
3682 			/* legacy firmware loading */
3683 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3684 			if (r)
3685 				return r;
3686 		}
3687 
3688 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3689 		if (r)
3690 			return r;
3691 	}
3692 
3693 	r = gfx_v9_0_kiq_resume(adev);
3694 	if (r)
3695 		return r;
3696 
3697 	if (adev->gfx.num_gfx_rings) {
3698 		r = gfx_v9_0_cp_gfx_resume(adev);
3699 		if (r)
3700 			return r;
3701 	}
3702 
3703 	r = gfx_v9_0_kcq_resume(adev);
3704 	if (r)
3705 		return r;
3706 
3707 	if (adev->gfx.num_gfx_rings) {
3708 		ring = &adev->gfx.gfx_ring[0];
3709 		r = amdgpu_ring_test_helper(ring);
3710 		if (r)
3711 			return r;
3712 	}
3713 
3714 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3715 		ring = &adev->gfx.compute_ring[i];
3716 		amdgpu_ring_test_helper(ring);
3717 	}
3718 
3719 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3720 
3721 	return 0;
3722 }
3723 
3724 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3725 {
3726 	u32 tmp;
3727 
3728 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3729 	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3730 		return;
3731 
3732 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3733 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3734 				adev->df.hash_status.hash_64k);
3735 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3736 				adev->df.hash_status.hash_2m);
3737 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3738 				adev->df.hash_status.hash_1g);
3739 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3740 }
3741 
3742 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3743 {
3744 	if (adev->gfx.num_gfx_rings)
3745 		gfx_v9_0_cp_gfx_enable(adev, enable);
3746 	gfx_v9_0_cp_compute_enable(adev, enable);
3747 }
3748 
3749 static int gfx_v9_0_hw_init(void *handle)
3750 {
3751 	int r;
3752 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3753 
3754 	if (!amdgpu_sriov_vf(adev))
3755 		gfx_v9_0_init_golden_registers(adev);
3756 
3757 	gfx_v9_0_constants_init(adev);
3758 
3759 	gfx_v9_0_init_tcp_config(adev);
3760 
3761 	r = adev->gfx.rlc.funcs->resume(adev);
3762 	if (r)
3763 		return r;
3764 
3765 	r = gfx_v9_0_cp_resume(adev);
3766 	if (r)
3767 		return r;
3768 
3769 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3770 		gfx_v9_4_2_set_power_brake_sequence(adev);
3771 
3772 	return r;
3773 }
3774 
3775 static int gfx_v9_0_hw_fini(void *handle)
3776 {
3777 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3778 
3779 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3780 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3781 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3782 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3783 
3784 	/* DF freeze and kcq disable will fail */
3785 	if (!amdgpu_ras_intr_triggered())
3786 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3787 		amdgpu_gfx_disable_kcq(adev, 0);
3788 
3789 	if (amdgpu_sriov_vf(adev)) {
3790 		gfx_v9_0_cp_gfx_enable(adev, false);
3791 		/* must disable polling for SRIOV when hw finished, otherwise
3792 		 * CPC engine may still keep fetching WB address which is already
3793 		 * invalid after sw finished and trigger DMAR reading error in
3794 		 * hypervisor side.
3795 		 */
3796 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3797 		return 0;
3798 	}
3799 
3800 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3801 	 * otherwise KIQ is hanging when binding back
3802 	 */
3803 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3804 		mutex_lock(&adev->srbm_mutex);
3805 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3806 				adev->gfx.kiq[0].ring.pipe,
3807 				adev->gfx.kiq[0].ring.queue, 0, 0);
3808 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3809 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3810 		mutex_unlock(&adev->srbm_mutex);
3811 	}
3812 
3813 	gfx_v9_0_cp_enable(adev, false);
3814 
3815 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3816 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3817 	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3818 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3819 		return 0;
3820 	}
3821 
3822 	adev->gfx.rlc.funcs->stop(adev);
3823 	return 0;
3824 }
3825 
3826 static int gfx_v9_0_suspend(void *handle)
3827 {
3828 	return gfx_v9_0_hw_fini(handle);
3829 }
3830 
3831 static int gfx_v9_0_resume(void *handle)
3832 {
3833 	return gfx_v9_0_hw_init(handle);
3834 }
3835 
3836 static bool gfx_v9_0_is_idle(void *handle)
3837 {
3838 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3839 
3840 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3841 				GRBM_STATUS, GUI_ACTIVE))
3842 		return false;
3843 	else
3844 		return true;
3845 }
3846 
3847 static int gfx_v9_0_wait_for_idle(void *handle)
3848 {
3849 	unsigned i;
3850 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3851 
3852 	for (i = 0; i < adev->usec_timeout; i++) {
3853 		if (gfx_v9_0_is_idle(handle))
3854 			return 0;
3855 		udelay(1);
3856 	}
3857 	return -ETIMEDOUT;
3858 }
3859 
3860 static int gfx_v9_0_soft_reset(void *handle)
3861 {
3862 	u32 grbm_soft_reset = 0;
3863 	u32 tmp;
3864 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3865 
3866 	/* GRBM_STATUS */
3867 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3868 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3869 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3870 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3871 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3872 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3873 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3874 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3875 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3876 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3877 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3878 	}
3879 
3880 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3881 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3882 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3883 	}
3884 
3885 	/* GRBM_STATUS2 */
3886 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3887 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3888 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3889 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3890 
3891 
3892 	if (grbm_soft_reset) {
3893 		/* stop the rlc */
3894 		adev->gfx.rlc.funcs->stop(adev);
3895 
3896 		if (adev->gfx.num_gfx_rings)
3897 			/* Disable GFX parsing/prefetching */
3898 			gfx_v9_0_cp_gfx_enable(adev, false);
3899 
3900 		/* Disable MEC parsing/prefetching */
3901 		gfx_v9_0_cp_compute_enable(adev, false);
3902 
3903 		if (grbm_soft_reset) {
3904 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3905 			tmp |= grbm_soft_reset;
3906 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3907 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3908 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3909 
3910 			udelay(50);
3911 
3912 			tmp &= ~grbm_soft_reset;
3913 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3914 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3915 		}
3916 
3917 		/* Wait a little for things to settle down */
3918 		udelay(50);
3919 	}
3920 	return 0;
3921 }
3922 
3923 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3924 {
3925 	signed long r, cnt = 0;
3926 	unsigned long flags;
3927 	uint32_t seq, reg_val_offs = 0;
3928 	uint64_t value = 0;
3929 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
3930 	struct amdgpu_ring *ring = &kiq->ring;
3931 
3932 	BUG_ON(!ring->funcs->emit_rreg);
3933 
3934 	spin_lock_irqsave(&kiq->ring_lock, flags);
3935 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3936 		pr_err("critical bug! too many kiq readers\n");
3937 		goto failed_unlock;
3938 	}
3939 	amdgpu_ring_alloc(ring, 32);
3940 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3941 	amdgpu_ring_write(ring, 9 |	/* src: register*/
3942 				(5 << 8) |	/* dst: memory */
3943 				(1 << 16) |	/* count sel */
3944 				(1 << 20));	/* write confirm */
3945 	amdgpu_ring_write(ring, 0);
3946 	amdgpu_ring_write(ring, 0);
3947 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3948 				reg_val_offs * 4));
3949 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3950 				reg_val_offs * 4));
3951 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3952 	if (r)
3953 		goto failed_undo;
3954 
3955 	amdgpu_ring_commit(ring);
3956 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3957 
3958 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3959 
3960 	/* don't wait anymore for gpu reset case because this way may
3961 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3962 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3963 	 * never return if we keep waiting in virt_kiq_rreg, which cause
3964 	 * gpu_recover() hang there.
3965 	 *
3966 	 * also don't wait anymore for IRQ context
3967 	 * */
3968 	if (r < 1 && (amdgpu_in_reset(adev)))
3969 		goto failed_kiq_read;
3970 
3971 	might_sleep();
3972 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3973 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3974 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3975 	}
3976 
3977 	if (cnt > MAX_KIQ_REG_TRY)
3978 		goto failed_kiq_read;
3979 
3980 	mb();
3981 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
3982 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3983 	amdgpu_device_wb_free(adev, reg_val_offs);
3984 	return value;
3985 
3986 failed_undo:
3987 	amdgpu_ring_undo(ring);
3988 failed_unlock:
3989 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3990 failed_kiq_read:
3991 	if (reg_val_offs)
3992 		amdgpu_device_wb_free(adev, reg_val_offs);
3993 	pr_err("failed to read gpu clock\n");
3994 	return ~0;
3995 }
3996 
3997 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3998 {
3999 	uint64_t clock, clock_lo, clock_hi, hi_check;
4000 
4001 	switch (adev->ip_versions[GC_HWIP][0]) {
4002 	case IP_VERSION(9, 3, 0):
4003 		preempt_disable();
4004 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4005 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4006 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4007 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4008 		 * roughly every 42 seconds.
4009 		 */
4010 		if (hi_check != clock_hi) {
4011 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4012 			clock_hi = hi_check;
4013 		}
4014 		preempt_enable();
4015 		clock = clock_lo | (clock_hi << 32ULL);
4016 		break;
4017 	default:
4018 		amdgpu_gfx_off_ctrl(adev, false);
4019 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4020 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4021 			clock = gfx_v9_0_kiq_read_clock(adev);
4022 		} else {
4023 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4024 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4025 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4026 		}
4027 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4028 		amdgpu_gfx_off_ctrl(adev, true);
4029 		break;
4030 	}
4031 	return clock;
4032 }
4033 
4034 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4035 					  uint32_t vmid,
4036 					  uint32_t gds_base, uint32_t gds_size,
4037 					  uint32_t gws_base, uint32_t gws_size,
4038 					  uint32_t oa_base, uint32_t oa_size)
4039 {
4040 	struct amdgpu_device *adev = ring->adev;
4041 
4042 	/* GDS Base */
4043 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4044 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4045 				   gds_base);
4046 
4047 	/* GDS Size */
4048 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4049 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4050 				   gds_size);
4051 
4052 	/* GWS */
4053 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4054 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4055 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4056 
4057 	/* OA */
4058 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4059 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4060 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4061 }
4062 
4063 static const u32 vgpr_init_compute_shader[] =
4064 {
4065 	0xb07c0000, 0xbe8000ff,
4066 	0x000000f8, 0xbf110800,
4067 	0x7e000280, 0x7e020280,
4068 	0x7e040280, 0x7e060280,
4069 	0x7e080280, 0x7e0a0280,
4070 	0x7e0c0280, 0x7e0e0280,
4071 	0x80808800, 0xbe803200,
4072 	0xbf84fff5, 0xbf9c0000,
4073 	0xd28c0001, 0x0001007f,
4074 	0xd28d0001, 0x0002027e,
4075 	0x10020288, 0xb8810904,
4076 	0xb7814000, 0xd1196a01,
4077 	0x00000301, 0xbe800087,
4078 	0xbefc00c1, 0xd89c4000,
4079 	0x00020201, 0xd89cc080,
4080 	0x00040401, 0x320202ff,
4081 	0x00000800, 0x80808100,
4082 	0xbf84fff8, 0x7e020280,
4083 	0xbf810000, 0x00000000,
4084 };
4085 
4086 static const u32 sgpr_init_compute_shader[] =
4087 {
4088 	0xb07c0000, 0xbe8000ff,
4089 	0x0000005f, 0xbee50080,
4090 	0xbe812c65, 0xbe822c65,
4091 	0xbe832c65, 0xbe842c65,
4092 	0xbe852c65, 0xb77c0005,
4093 	0x80808500, 0xbf84fff8,
4094 	0xbe800080, 0xbf810000,
4095 };
4096 
4097 static const u32 vgpr_init_compute_shader_arcturus[] = {
4098 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4099 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4100 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4101 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4102 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4103 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4104 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4105 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4106 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4107 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4108 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4109 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4110 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4111 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4112 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4113 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4114 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4115 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4116 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4117 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4118 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4119 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4120 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4121 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4122 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4123 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4124 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4125 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4126 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4127 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4128 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4129 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4130 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4131 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4132 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4133 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4134 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4135 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4136 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4137 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4138 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4139 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4140 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4141 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4142 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4143 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4144 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4145 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4146 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4147 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4148 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4149 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4150 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4151 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4152 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4153 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4154 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4155 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4156 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4157 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4158 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4159 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4160 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4161 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4162 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4163 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4164 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4165 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4166 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4167 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4168 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4169 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4170 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4171 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4172 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4173 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4174 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4175 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4176 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4177 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4178 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4179 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4180 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4181 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4182 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4183 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4184 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4185 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4186 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4187 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4188 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4189 	0xbf84fff8, 0xbf810000,
4190 };
4191 
4192 /* When below register arrays changed, please update gpr_reg_size,
4193   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4194   to cover all gfx9 ASICs */
4195 static const struct soc15_reg_entry vgpr_init_regs[] = {
4196    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4197    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4198    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4199    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4200    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4201    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4202    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4203    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4204    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4205    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4206    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4207    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4208    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4209    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4210 };
4211 
4212 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4213    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4214    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4215    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4216    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4217    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4218    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4219    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4220    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4221    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4222    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4223    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4224    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4225    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4226    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4227 };
4228 
4229 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4230    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4231    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4232    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4233    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4234    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4235    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4236    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4237    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4238    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4239    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4240    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4241    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4242    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4243    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4244 };
4245 
4246 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4247    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4248    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4249    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4250    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4251    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4252    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4253    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4254    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4255    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4256    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4257    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4258    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4259    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4260    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4261 };
4262 
4263 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4264    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4265    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4266    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4267    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4268    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4269    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4270    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4271    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4272    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4273    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4274    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4275    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4276    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4277    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4278    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4279    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4280    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4281    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4282    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4283    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4284    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4285    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4286    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4287    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4288    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4289    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4290    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4291    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4292    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4293    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4294    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4295    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4296    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4297 };
4298 
4299 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4300 {
4301 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4302 	int i, r;
4303 
4304 	/* only support when RAS is enabled */
4305 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4306 		return 0;
4307 
4308 	r = amdgpu_ring_alloc(ring, 7);
4309 	if (r) {
4310 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4311 			ring->name, r);
4312 		return r;
4313 	}
4314 
4315 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4316 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4317 
4318 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4319 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4320 				PACKET3_DMA_DATA_DST_SEL(1) |
4321 				PACKET3_DMA_DATA_SRC_SEL(2) |
4322 				PACKET3_DMA_DATA_ENGINE(0)));
4323 	amdgpu_ring_write(ring, 0);
4324 	amdgpu_ring_write(ring, 0);
4325 	amdgpu_ring_write(ring, 0);
4326 	amdgpu_ring_write(ring, 0);
4327 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4328 				adev->gds.gds_size);
4329 
4330 	amdgpu_ring_commit(ring);
4331 
4332 	for (i = 0; i < adev->usec_timeout; i++) {
4333 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4334 			break;
4335 		udelay(1);
4336 	}
4337 
4338 	if (i >= adev->usec_timeout)
4339 		r = -ETIMEDOUT;
4340 
4341 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4342 
4343 	return r;
4344 }
4345 
4346 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4347 {
4348 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4349 	struct amdgpu_ib ib;
4350 	struct dma_fence *f = NULL;
4351 	int r, i;
4352 	unsigned total_size, vgpr_offset, sgpr_offset;
4353 	u64 gpu_addr;
4354 
4355 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4356 						adev->gfx.config.max_cu_per_sh *
4357 						adev->gfx.config.max_sh_per_se;
4358 	int sgpr_work_group_size = 5;
4359 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4360 	int vgpr_init_shader_size;
4361 	const u32 *vgpr_init_shader_ptr;
4362 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4363 
4364 	/* only support when RAS is enabled */
4365 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4366 		return 0;
4367 
4368 	/* bail if the compute ring is not ready */
4369 	if (!ring->sched.ready)
4370 		return 0;
4371 
4372 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4373 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4374 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4375 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4376 	} else {
4377 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4378 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4379 		vgpr_init_regs_ptr = vgpr_init_regs;
4380 	}
4381 
4382 	total_size =
4383 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4384 	total_size +=
4385 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4386 	total_size +=
4387 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4388 	total_size = ALIGN(total_size, 256);
4389 	vgpr_offset = total_size;
4390 	total_size += ALIGN(vgpr_init_shader_size, 256);
4391 	sgpr_offset = total_size;
4392 	total_size += sizeof(sgpr_init_compute_shader);
4393 
4394 	/* allocate an indirect buffer to put the commands in */
4395 	memset(&ib, 0, sizeof(ib));
4396 	r = amdgpu_ib_get(adev, NULL, total_size,
4397 					AMDGPU_IB_POOL_DIRECT, &ib);
4398 	if (r) {
4399 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4400 		return r;
4401 	}
4402 
4403 	/* load the compute shaders */
4404 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4405 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4406 
4407 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4408 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4409 
4410 	/* init the ib length to 0 */
4411 	ib.length_dw = 0;
4412 
4413 	/* VGPR */
4414 	/* write the register state for the compute dispatch */
4415 	for (i = 0; i < gpr_reg_size; i++) {
4416 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4417 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4418 								- PACKET3_SET_SH_REG_START;
4419 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4420 	}
4421 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4422 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4423 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4424 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4425 							- PACKET3_SET_SH_REG_START;
4426 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4427 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4428 
4429 	/* write dispatch packet */
4430 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4431 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4432 	ib.ptr[ib.length_dw++] = 1; /* y */
4433 	ib.ptr[ib.length_dw++] = 1; /* z */
4434 	ib.ptr[ib.length_dw++] =
4435 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4436 
4437 	/* write CS partial flush packet */
4438 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4439 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4440 
4441 	/* SGPR1 */
4442 	/* write the register state for the compute dispatch */
4443 	for (i = 0; i < gpr_reg_size; i++) {
4444 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4445 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4446 								- PACKET3_SET_SH_REG_START;
4447 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4448 	}
4449 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4450 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4451 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4452 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4453 							- PACKET3_SET_SH_REG_START;
4454 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4455 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4456 
4457 	/* write dispatch packet */
4458 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4459 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4460 	ib.ptr[ib.length_dw++] = 1; /* y */
4461 	ib.ptr[ib.length_dw++] = 1; /* z */
4462 	ib.ptr[ib.length_dw++] =
4463 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4464 
4465 	/* write CS partial flush packet */
4466 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4467 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4468 
4469 	/* SGPR2 */
4470 	/* write the register state for the compute dispatch */
4471 	for (i = 0; i < gpr_reg_size; i++) {
4472 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4473 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4474 								- PACKET3_SET_SH_REG_START;
4475 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4476 	}
4477 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4478 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4479 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4480 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4481 							- PACKET3_SET_SH_REG_START;
4482 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4483 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4484 
4485 	/* write dispatch packet */
4486 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4487 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4488 	ib.ptr[ib.length_dw++] = 1; /* y */
4489 	ib.ptr[ib.length_dw++] = 1; /* z */
4490 	ib.ptr[ib.length_dw++] =
4491 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4492 
4493 	/* write CS partial flush packet */
4494 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4495 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4496 
4497 	/* shedule the ib on the ring */
4498 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4499 	if (r) {
4500 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4501 		goto fail;
4502 	}
4503 
4504 	/* wait for the GPU to finish processing the IB */
4505 	r = dma_fence_wait(f, false);
4506 	if (r) {
4507 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4508 		goto fail;
4509 	}
4510 
4511 fail:
4512 	amdgpu_ib_free(adev, &ib, NULL);
4513 	dma_fence_put(f);
4514 
4515 	return r;
4516 }
4517 
4518 static int gfx_v9_0_early_init(void *handle)
4519 {
4520 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4521 
4522 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4523 
4524 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4525 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4526 		adev->gfx.num_gfx_rings = 0;
4527 	else
4528 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4529 	adev->gfx.xcc_mask = 1;
4530 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4531 					  AMDGPU_MAX_COMPUTE_RINGS);
4532 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4533 	gfx_v9_0_set_ring_funcs(adev);
4534 	gfx_v9_0_set_irq_funcs(adev);
4535 	gfx_v9_0_set_gds_init(adev);
4536 	gfx_v9_0_set_rlc_funcs(adev);
4537 
4538 	/* init rlcg reg access ctrl */
4539 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4540 
4541 	return gfx_v9_0_init_microcode(adev);
4542 }
4543 
4544 static int gfx_v9_0_ecc_late_init(void *handle)
4545 {
4546 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4547 	int r;
4548 
4549 	/*
4550 	 * Temp workaround to fix the issue that CP firmware fails to
4551 	 * update read pointer when CPDMA is writing clearing operation
4552 	 * to GDS in suspend/resume sequence on several cards. So just
4553 	 * limit this operation in cold boot sequence.
4554 	 */
4555 	if ((!adev->in_suspend) &&
4556 	    (adev->gds.gds_size)) {
4557 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4558 		if (r)
4559 			return r;
4560 	}
4561 
4562 	/* requires IBs so do in late init after IB pool is initialized */
4563 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4564 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4565 	else
4566 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4567 
4568 	if (r)
4569 		return r;
4570 
4571 	if (adev->gfx.ras &&
4572 	    adev->gfx.ras->enable_watchdog_timer)
4573 		adev->gfx.ras->enable_watchdog_timer(adev);
4574 
4575 	return 0;
4576 }
4577 
4578 static int gfx_v9_0_late_init(void *handle)
4579 {
4580 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4581 	int r;
4582 
4583 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4584 	if (r)
4585 		return r;
4586 
4587 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4588 	if (r)
4589 		return r;
4590 
4591 	r = gfx_v9_0_ecc_late_init(handle);
4592 	if (r)
4593 		return r;
4594 
4595 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4596 		gfx_v9_4_2_debug_trap_config_init(adev,
4597 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4598 	else
4599 		gfx_v9_0_debug_trap_config_init(adev,
4600 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4601 
4602 	return 0;
4603 }
4604 
4605 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4606 {
4607 	uint32_t rlc_setting;
4608 
4609 	/* if RLC is not enabled, do nothing */
4610 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4611 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4612 		return false;
4613 
4614 	return true;
4615 }
4616 
4617 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4618 {
4619 	uint32_t data;
4620 	unsigned i;
4621 
4622 	data = RLC_SAFE_MODE__CMD_MASK;
4623 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4624 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4625 
4626 	/* wait for RLC_SAFE_MODE */
4627 	for (i = 0; i < adev->usec_timeout; i++) {
4628 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4629 			break;
4630 		udelay(1);
4631 	}
4632 }
4633 
4634 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4635 {
4636 	uint32_t data;
4637 
4638 	data = RLC_SAFE_MODE__CMD_MASK;
4639 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4640 }
4641 
4642 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4643 						bool enable)
4644 {
4645 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4646 
4647 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4648 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4649 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4650 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4651 	} else {
4652 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4653 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4654 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4655 	}
4656 
4657 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4658 }
4659 
4660 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4661 						bool enable)
4662 {
4663 	/* TODO: double check if we need to perform under safe mode */
4664 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4665 
4666 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4667 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4668 	else
4669 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4670 
4671 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4672 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4673 	else
4674 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4675 
4676 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4677 }
4678 
4679 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4680 						      bool enable)
4681 {
4682 	uint32_t data, def;
4683 
4684 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4685 
4686 	/* It is disabled by HW by default */
4687 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4688 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4689 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4690 
4691 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4692 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4693 
4694 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4695 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4696 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4697 
4698 		/* only for Vega10 & Raven1 */
4699 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4700 
4701 		if (def != data)
4702 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4703 
4704 		/* MGLS is a global flag to control all MGLS in GFX */
4705 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4706 			/* 2 - RLC memory Light sleep */
4707 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4708 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4709 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4710 				if (def != data)
4711 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4712 			}
4713 			/* 3 - CP memory Light sleep */
4714 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4715 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4716 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4717 				if (def != data)
4718 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4719 			}
4720 		}
4721 	} else {
4722 		/* 1 - MGCG_OVERRIDE */
4723 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4724 
4725 		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4726 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4727 
4728 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4729 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4730 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4731 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4732 
4733 		if (def != data)
4734 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4735 
4736 		/* 2 - disable MGLS in RLC */
4737 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4738 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4739 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4740 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4741 		}
4742 
4743 		/* 3 - disable MGLS in CP */
4744 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4745 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4746 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4747 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4748 		}
4749 	}
4750 
4751 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4752 }
4753 
4754 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4755 					   bool enable)
4756 {
4757 	uint32_t data, def;
4758 
4759 	if (!adev->gfx.num_gfx_rings)
4760 		return;
4761 
4762 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4763 
4764 	/* Enable 3D CGCG/CGLS */
4765 	if (enable) {
4766 		/* write cmd to clear cgcg/cgls ov */
4767 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4768 		/* unset CGCG override */
4769 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4770 		/* update CGCG and CGLS override bits */
4771 		if (def != data)
4772 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4773 
4774 		/* enable 3Dcgcg FSM(0x0000363f) */
4775 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4776 
4777 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4778 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4779 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4780 		else
4781 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4782 
4783 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4784 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4785 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4786 		if (def != data)
4787 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4788 
4789 		/* set IDLE_POLL_COUNT(0x00900100) */
4790 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4791 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4792 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4793 		if (def != data)
4794 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4795 	} else {
4796 		/* Disable CGCG/CGLS */
4797 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4798 		/* disable cgcg, cgls should be disabled */
4799 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4800 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4801 		/* disable cgcg and cgls in FSM */
4802 		if (def != data)
4803 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4804 	}
4805 
4806 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4807 }
4808 
4809 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4810 						      bool enable)
4811 {
4812 	uint32_t def, data;
4813 
4814 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4815 
4816 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4817 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4818 		/* unset CGCG override */
4819 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4820 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4821 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4822 		else
4823 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4824 		/* update CGCG and CGLS override bits */
4825 		if (def != data)
4826 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4827 
4828 		/* enable cgcg FSM(0x0000363F) */
4829 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4830 
4831 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4832 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4833 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4834 		else
4835 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4836 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4837 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4838 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4839 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4840 		if (def != data)
4841 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4842 
4843 		/* set IDLE_POLL_COUNT(0x00900100) */
4844 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4845 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4846 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4847 		if (def != data)
4848 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4849 	} else {
4850 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4851 		/* reset CGCG/CGLS bits */
4852 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4853 		/* disable cgcg and cgls in FSM */
4854 		if (def != data)
4855 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4856 	}
4857 
4858 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4859 }
4860 
4861 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4862 					    bool enable)
4863 {
4864 	if (enable) {
4865 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4866 		 * ===  MGCG + MGLS ===
4867 		 */
4868 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4869 		/* ===  CGCG /CGLS for GFX 3D Only === */
4870 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4871 		/* ===  CGCG + CGLS === */
4872 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4873 	} else {
4874 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4875 		 * ===  CGCG + CGLS ===
4876 		 */
4877 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4878 		/* ===  CGCG /CGLS for GFX 3D Only === */
4879 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4880 		/* ===  MGCG + MGLS === */
4881 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4882 	}
4883 	return 0;
4884 }
4885 
4886 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
4887 					      unsigned int vmid)
4888 {
4889 	u32 reg, data;
4890 
4891 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4892 	if (amdgpu_sriov_is_pp_one_vf(adev))
4893 		data = RREG32_NO_KIQ(reg);
4894 	else
4895 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4896 
4897 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4898 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4899 
4900 	if (amdgpu_sriov_is_pp_one_vf(adev))
4901 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4902 	else
4903 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4904 }
4905 
4906 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
4907 {
4908 	amdgpu_gfx_off_ctrl(adev, false);
4909 
4910 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
4911 
4912 	amdgpu_gfx_off_ctrl(adev, true);
4913 }
4914 
4915 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4916 					uint32_t offset,
4917 					struct soc15_reg_rlcg *entries, int arr_size)
4918 {
4919 	int i;
4920 	uint32_t reg;
4921 
4922 	if (!entries)
4923 		return false;
4924 
4925 	for (i = 0; i < arr_size; i++) {
4926 		const struct soc15_reg_rlcg *entry;
4927 
4928 		entry = &entries[i];
4929 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4930 		if (offset == reg)
4931 			return true;
4932 	}
4933 
4934 	return false;
4935 }
4936 
4937 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4938 {
4939 	return gfx_v9_0_check_rlcg_range(adev, offset,
4940 					(void *)rlcg_access_gc_9_0,
4941 					ARRAY_SIZE(rlcg_access_gc_9_0));
4942 }
4943 
4944 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4945 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4946 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4947 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4948 	.init = gfx_v9_0_rlc_init,
4949 	.get_csb_size = gfx_v9_0_get_csb_size,
4950 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4951 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4952 	.resume = gfx_v9_0_rlc_resume,
4953 	.stop = gfx_v9_0_rlc_stop,
4954 	.reset = gfx_v9_0_rlc_reset,
4955 	.start = gfx_v9_0_rlc_start,
4956 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4957 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4958 };
4959 
4960 static int gfx_v9_0_set_powergating_state(void *handle,
4961 					  enum amd_powergating_state state)
4962 {
4963 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4964 	bool enable = (state == AMD_PG_STATE_GATE);
4965 
4966 	switch (adev->ip_versions[GC_HWIP][0]) {
4967 	case IP_VERSION(9, 2, 2):
4968 	case IP_VERSION(9, 1, 0):
4969 	case IP_VERSION(9, 3, 0):
4970 		if (!enable)
4971 			amdgpu_gfx_off_ctrl(adev, false);
4972 
4973 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4974 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4975 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4976 		} else {
4977 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4978 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4979 		}
4980 
4981 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4982 			gfx_v9_0_enable_cp_power_gating(adev, true);
4983 		else
4984 			gfx_v9_0_enable_cp_power_gating(adev, false);
4985 
4986 		/* update gfx cgpg state */
4987 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4988 
4989 		/* update mgcg state */
4990 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4991 
4992 		if (enable)
4993 			amdgpu_gfx_off_ctrl(adev, true);
4994 		break;
4995 	case IP_VERSION(9, 2, 1):
4996 		amdgpu_gfx_off_ctrl(adev, enable);
4997 		break;
4998 	default:
4999 		break;
5000 	}
5001 
5002 	return 0;
5003 }
5004 
5005 static int gfx_v9_0_set_clockgating_state(void *handle,
5006 					  enum amd_clockgating_state state)
5007 {
5008 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5009 
5010 	if (amdgpu_sriov_vf(adev))
5011 		return 0;
5012 
5013 	switch (adev->ip_versions[GC_HWIP][0]) {
5014 	case IP_VERSION(9, 0, 1):
5015 	case IP_VERSION(9, 2, 1):
5016 	case IP_VERSION(9, 4, 0):
5017 	case IP_VERSION(9, 2, 2):
5018 	case IP_VERSION(9, 1, 0):
5019 	case IP_VERSION(9, 4, 1):
5020 	case IP_VERSION(9, 3, 0):
5021 	case IP_VERSION(9, 4, 2):
5022 		gfx_v9_0_update_gfx_clock_gating(adev,
5023 						 state == AMD_CG_STATE_GATE);
5024 		break;
5025 	default:
5026 		break;
5027 	}
5028 	return 0;
5029 }
5030 
5031 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5032 {
5033 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034 	int data;
5035 
5036 	if (amdgpu_sriov_vf(adev))
5037 		*flags = 0;
5038 
5039 	/* AMD_CG_SUPPORT_GFX_MGCG */
5040 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5041 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5042 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5043 
5044 	/* AMD_CG_SUPPORT_GFX_CGCG */
5045 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5046 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5047 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5048 
5049 	/* AMD_CG_SUPPORT_GFX_CGLS */
5050 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5051 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5052 
5053 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5054 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5055 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5056 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5057 
5058 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5059 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5060 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5061 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5062 
5063 	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5064 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5065 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5066 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5067 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5068 
5069 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5070 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5071 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5072 	}
5073 }
5074 
5075 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5076 {
5077 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5078 }
5079 
5080 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5081 {
5082 	struct amdgpu_device *adev = ring->adev;
5083 	u64 wptr;
5084 
5085 	/* XXX check if swapping is necessary on BE */
5086 	if (ring->use_doorbell) {
5087 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5088 	} else {
5089 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5090 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5091 	}
5092 
5093 	return wptr;
5094 }
5095 
5096 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5097 {
5098 	struct amdgpu_device *adev = ring->adev;
5099 
5100 	if (ring->use_doorbell) {
5101 		/* XXX check if swapping is necessary on BE */
5102 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5103 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5104 	} else {
5105 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5106 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5107 	}
5108 }
5109 
5110 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5111 {
5112 	struct amdgpu_device *adev = ring->adev;
5113 	u32 ref_and_mask, reg_mem_engine;
5114 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5115 
5116 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5117 		switch (ring->me) {
5118 		case 1:
5119 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5120 			break;
5121 		case 2:
5122 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5123 			break;
5124 		default:
5125 			return;
5126 		}
5127 		reg_mem_engine = 0;
5128 	} else {
5129 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5130 		reg_mem_engine = 1; /* pfp */
5131 	}
5132 
5133 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5134 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5135 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5136 			      ref_and_mask, ref_and_mask, 0x20);
5137 }
5138 
5139 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5140 					struct amdgpu_job *job,
5141 					struct amdgpu_ib *ib,
5142 					uint32_t flags)
5143 {
5144 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5145 	u32 header, control = 0;
5146 
5147 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5148 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5149 	else
5150 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5151 
5152 	control |= ib->length_dw | (vmid << 24);
5153 
5154 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5155 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5156 
5157 		if (flags & AMDGPU_IB_PREEMPTED)
5158 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5159 
5160 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5161 			gfx_v9_0_ring_emit_de_meta(ring,
5162 						   (!amdgpu_sriov_vf(ring->adev) &&
5163 						   flags & AMDGPU_IB_PREEMPTED) ?
5164 						   true : false,
5165 						   job->gds_size > 0 && job->gds_base != 0);
5166 	}
5167 
5168 	amdgpu_ring_write(ring, header);
5169 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5170 	amdgpu_ring_write(ring,
5171 #ifdef __BIG_ENDIAN
5172 		(2 << 0) |
5173 #endif
5174 		lower_32_bits(ib->gpu_addr));
5175 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5176 	amdgpu_ring_ib_on_emit_cntl(ring);
5177 	amdgpu_ring_write(ring, control);
5178 }
5179 
5180 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5181 				     unsigned offset)
5182 {
5183 	u32 control = ring->ring[offset];
5184 
5185 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5186 	ring->ring[offset] = control;
5187 }
5188 
5189 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5190 					unsigned offset)
5191 {
5192 	struct amdgpu_device *adev = ring->adev;
5193 	void *ce_payload_cpu_addr;
5194 	uint64_t payload_offset, payload_size;
5195 
5196 	payload_size = sizeof(struct v9_ce_ib_state);
5197 
5198 	if (ring->is_mes_queue) {
5199 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5200 					  gfx[0].gfx_meta_data) +
5201 			offsetof(struct v9_gfx_meta_data, ce_payload);
5202 		ce_payload_cpu_addr =
5203 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5204 	} else {
5205 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5206 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5207 	}
5208 
5209 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5210 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5211 	} else {
5212 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5213 		       (ring->buf_mask + 1 - offset) << 2);
5214 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5215 		memcpy((void *)&ring->ring[0],
5216 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5217 		       payload_size);
5218 	}
5219 }
5220 
5221 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5222 					unsigned offset)
5223 {
5224 	struct amdgpu_device *adev = ring->adev;
5225 	void *de_payload_cpu_addr;
5226 	uint64_t payload_offset, payload_size;
5227 
5228 	payload_size = sizeof(struct v9_de_ib_state);
5229 
5230 	if (ring->is_mes_queue) {
5231 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5232 					  gfx[0].gfx_meta_data) +
5233 			offsetof(struct v9_gfx_meta_data, de_payload);
5234 		de_payload_cpu_addr =
5235 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5236 	} else {
5237 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5238 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5239 	}
5240 
5241 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5242 		IB_COMPLETION_STATUS_PREEMPTED;
5243 
5244 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5245 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5246 	} else {
5247 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5248 		       (ring->buf_mask + 1 - offset) << 2);
5249 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5250 		memcpy((void *)&ring->ring[0],
5251 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5252 		       payload_size);
5253 	}
5254 }
5255 
5256 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5257 					  struct amdgpu_job *job,
5258 					  struct amdgpu_ib *ib,
5259 					  uint32_t flags)
5260 {
5261 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5262 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5263 
5264 	/* Currently, there is a high possibility to get wave ID mismatch
5265 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5266 	 * different wave IDs than the GDS expects. This situation happens
5267 	 * randomly when at least 5 compute pipes use GDS ordered append.
5268 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5269 	 * Those are probably bugs somewhere else in the kernel driver.
5270 	 *
5271 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5272 	 * GDS to 0 for this ring (me/pipe).
5273 	 */
5274 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5275 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5276 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5277 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5278 	}
5279 
5280 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5281 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5282 	amdgpu_ring_write(ring,
5283 #ifdef __BIG_ENDIAN
5284 				(2 << 0) |
5285 #endif
5286 				lower_32_bits(ib->gpu_addr));
5287 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5288 	amdgpu_ring_write(ring, control);
5289 }
5290 
5291 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5292 				     u64 seq, unsigned flags)
5293 {
5294 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5295 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5296 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5297 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5298 	uint32_t dw2 = 0;
5299 
5300 	/* RELEASE_MEM - flush caches, send int */
5301 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5302 
5303 	if (writeback) {
5304 		dw2 = EOP_TC_NC_ACTION_EN;
5305 	} else {
5306 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5307 				EOP_TC_MD_ACTION_EN;
5308 	}
5309 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5310 				EVENT_INDEX(5);
5311 	if (exec)
5312 		dw2 |= EOP_EXEC;
5313 
5314 	amdgpu_ring_write(ring, dw2);
5315 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5316 
5317 	/*
5318 	 * the address should be Qword aligned if 64bit write, Dword
5319 	 * aligned if only send 32bit data low (discard data high)
5320 	 */
5321 	if (write64bit)
5322 		BUG_ON(addr & 0x7);
5323 	else
5324 		BUG_ON(addr & 0x3);
5325 	amdgpu_ring_write(ring, lower_32_bits(addr));
5326 	amdgpu_ring_write(ring, upper_32_bits(addr));
5327 	amdgpu_ring_write(ring, lower_32_bits(seq));
5328 	amdgpu_ring_write(ring, upper_32_bits(seq));
5329 	amdgpu_ring_write(ring, 0);
5330 }
5331 
5332 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5333 {
5334 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5335 	uint32_t seq = ring->fence_drv.sync_seq;
5336 	uint64_t addr = ring->fence_drv.gpu_addr;
5337 
5338 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5339 			      lower_32_bits(addr), upper_32_bits(addr),
5340 			      seq, 0xffffffff, 4);
5341 }
5342 
5343 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5344 					unsigned vmid, uint64_t pd_addr)
5345 {
5346 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5347 
5348 	/* compute doesn't have PFP */
5349 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5350 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5351 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5352 		amdgpu_ring_write(ring, 0x0);
5353 	}
5354 }
5355 
5356 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5357 {
5358 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5359 }
5360 
5361 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5362 {
5363 	u64 wptr;
5364 
5365 	/* XXX check if swapping is necessary on BE */
5366 	if (ring->use_doorbell)
5367 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5368 	else
5369 		BUG();
5370 	return wptr;
5371 }
5372 
5373 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5374 {
5375 	struct amdgpu_device *adev = ring->adev;
5376 
5377 	/* XXX check if swapping is necessary on BE */
5378 	if (ring->use_doorbell) {
5379 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5380 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5381 	} else{
5382 		BUG(); /* only DOORBELL method supported on gfx9 now */
5383 	}
5384 }
5385 
5386 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5387 					 u64 seq, unsigned int flags)
5388 {
5389 	struct amdgpu_device *adev = ring->adev;
5390 
5391 	/* we only allocate 32bit for each seq wb address */
5392 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5393 
5394 	/* write fence seq to the "addr" */
5395 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5396 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5397 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5398 	amdgpu_ring_write(ring, lower_32_bits(addr));
5399 	amdgpu_ring_write(ring, upper_32_bits(addr));
5400 	amdgpu_ring_write(ring, lower_32_bits(seq));
5401 
5402 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5403 		/* set register to trigger INT */
5404 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5405 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5406 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5407 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5408 		amdgpu_ring_write(ring, 0);
5409 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5410 	}
5411 }
5412 
5413 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5414 {
5415 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5416 	amdgpu_ring_write(ring, 0);
5417 }
5418 
5419 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5420 {
5421 	struct amdgpu_device *adev = ring->adev;
5422 	struct v9_ce_ib_state ce_payload = {0};
5423 	uint64_t offset, ce_payload_gpu_addr;
5424 	void *ce_payload_cpu_addr;
5425 	int cnt;
5426 
5427 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5428 
5429 	if (ring->is_mes_queue) {
5430 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5431 				  gfx[0].gfx_meta_data) +
5432 			offsetof(struct v9_gfx_meta_data, ce_payload);
5433 		ce_payload_gpu_addr =
5434 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5435 		ce_payload_cpu_addr =
5436 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5437 	} else {
5438 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5439 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5440 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5441 	}
5442 
5443 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5444 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5445 				 WRITE_DATA_DST_SEL(8) |
5446 				 WR_CONFIRM) |
5447 				 WRITE_DATA_CACHE_POLICY(0));
5448 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5449 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5450 
5451 	amdgpu_ring_ib_on_emit_ce(ring);
5452 
5453 	if (resume)
5454 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5455 					   sizeof(ce_payload) >> 2);
5456 	else
5457 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5458 					   sizeof(ce_payload) >> 2);
5459 }
5460 
5461 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5462 {
5463 	int i, r = 0;
5464 	struct amdgpu_device *adev = ring->adev;
5465 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5466 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5467 	unsigned long flags;
5468 
5469 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5470 		return -EINVAL;
5471 
5472 	spin_lock_irqsave(&kiq->ring_lock, flags);
5473 
5474 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5475 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5476 		return -ENOMEM;
5477 	}
5478 
5479 	/* assert preemption condition */
5480 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5481 
5482 	ring->trail_seq += 1;
5483 	amdgpu_ring_alloc(ring, 13);
5484 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5485 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5486 
5487 	/* assert IB preemption, emit the trailing fence */
5488 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5489 				   ring->trail_fence_gpu_addr,
5490 				   ring->trail_seq);
5491 
5492 	amdgpu_ring_commit(kiq_ring);
5493 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5494 
5495 	/* poll the trailing fence */
5496 	for (i = 0; i < adev->usec_timeout; i++) {
5497 		if (ring->trail_seq ==
5498 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5499 			break;
5500 		udelay(1);
5501 	}
5502 
5503 	if (i >= adev->usec_timeout) {
5504 		r = -EINVAL;
5505 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5506 	}
5507 
5508 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5509 	amdgpu_ring_emit_wreg(ring,
5510 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5511 			      0x0);
5512 	amdgpu_ring_commit(ring);
5513 
5514 	/* deassert preemption condition */
5515 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5516 	return r;
5517 }
5518 
5519 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5520 {
5521 	struct amdgpu_device *adev = ring->adev;
5522 	struct v9_de_ib_state de_payload = {0};
5523 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5524 	void *de_payload_cpu_addr;
5525 	int cnt;
5526 
5527 	if (ring->is_mes_queue) {
5528 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5529 				  gfx[0].gfx_meta_data) +
5530 			offsetof(struct v9_gfx_meta_data, de_payload);
5531 		de_payload_gpu_addr =
5532 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5533 		de_payload_cpu_addr =
5534 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5535 
5536 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5537 				  gfx[0].gds_backup) +
5538 			offsetof(struct v9_gfx_meta_data, de_payload);
5539 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5540 	} else {
5541 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5542 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5543 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5544 
5545 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5546 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5547 				 PAGE_SIZE);
5548 	}
5549 
5550 	if (usegds) {
5551 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5552 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5553 	}
5554 
5555 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5556 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5557 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5558 				 WRITE_DATA_DST_SEL(8) |
5559 				 WR_CONFIRM) |
5560 				 WRITE_DATA_CACHE_POLICY(0));
5561 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5562 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5563 
5564 	amdgpu_ring_ib_on_emit_de(ring);
5565 	if (resume)
5566 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5567 					   sizeof(de_payload) >> 2);
5568 	else
5569 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5570 					   sizeof(de_payload) >> 2);
5571 }
5572 
5573 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5574 				   bool secure)
5575 {
5576 	uint32_t v = secure ? FRAME_TMZ : 0;
5577 
5578 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5579 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5580 }
5581 
5582 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5583 {
5584 	uint32_t dw2 = 0;
5585 
5586 	gfx_v9_0_ring_emit_ce_meta(ring,
5587 				   (!amdgpu_sriov_vf(ring->adev) &&
5588 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5589 
5590 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5591 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5592 		/* set load_global_config & load_global_uconfig */
5593 		dw2 |= 0x8001;
5594 		/* set load_cs_sh_regs */
5595 		dw2 |= 0x01000000;
5596 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5597 		dw2 |= 0x10002;
5598 
5599 		/* set load_ce_ram if preamble presented */
5600 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5601 			dw2 |= 0x10000000;
5602 	} else {
5603 		/* still load_ce_ram if this is the first time preamble presented
5604 		 * although there is no context switch happens.
5605 		 */
5606 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5607 			dw2 |= 0x10000000;
5608 	}
5609 
5610 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5611 	amdgpu_ring_write(ring, dw2);
5612 	amdgpu_ring_write(ring, 0);
5613 }
5614 
5615 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5616 {
5617 	unsigned ret;
5618 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5619 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5620 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5621 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5622 	ret = ring->wptr & ring->buf_mask;
5623 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5624 	return ret;
5625 }
5626 
5627 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5628 {
5629 	unsigned cur;
5630 	BUG_ON(offset > ring->buf_mask);
5631 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5632 
5633 	cur = (ring->wptr - 1) & ring->buf_mask;
5634 	if (likely(cur > offset))
5635 		ring->ring[offset] = cur - offset;
5636 	else
5637 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5638 }
5639 
5640 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5641 				    uint32_t reg_val_offs)
5642 {
5643 	struct amdgpu_device *adev = ring->adev;
5644 
5645 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5646 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5647 				(5 << 8) |	/* dst: memory */
5648 				(1 << 20));	/* write confirm */
5649 	amdgpu_ring_write(ring, reg);
5650 	amdgpu_ring_write(ring, 0);
5651 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5652 				reg_val_offs * 4));
5653 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5654 				reg_val_offs * 4));
5655 }
5656 
5657 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5658 				    uint32_t val)
5659 {
5660 	uint32_t cmd = 0;
5661 
5662 	switch (ring->funcs->type) {
5663 	case AMDGPU_RING_TYPE_GFX:
5664 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5665 		break;
5666 	case AMDGPU_RING_TYPE_KIQ:
5667 		cmd = (1 << 16); /* no inc addr */
5668 		break;
5669 	default:
5670 		cmd = WR_CONFIRM;
5671 		break;
5672 	}
5673 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674 	amdgpu_ring_write(ring, cmd);
5675 	amdgpu_ring_write(ring, reg);
5676 	amdgpu_ring_write(ring, 0);
5677 	amdgpu_ring_write(ring, val);
5678 }
5679 
5680 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5681 					uint32_t val, uint32_t mask)
5682 {
5683 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5684 }
5685 
5686 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5687 						  uint32_t reg0, uint32_t reg1,
5688 						  uint32_t ref, uint32_t mask)
5689 {
5690 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5691 	struct amdgpu_device *adev = ring->adev;
5692 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5693 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5694 
5695 	if (fw_version_ok)
5696 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5697 				      ref, mask, 0x20);
5698 	else
5699 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5700 							   ref, mask);
5701 }
5702 
5703 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5704 {
5705 	struct amdgpu_device *adev = ring->adev;
5706 	uint32_t value = 0;
5707 
5708 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5709 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5710 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5711 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5712 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5713 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5714 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5715 }
5716 
5717 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5718 						 enum amdgpu_interrupt_state state)
5719 {
5720 	switch (state) {
5721 	case AMDGPU_IRQ_STATE_DISABLE:
5722 	case AMDGPU_IRQ_STATE_ENABLE:
5723 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5724 			       TIME_STAMP_INT_ENABLE,
5725 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5726 		break;
5727 	default:
5728 		break;
5729 	}
5730 }
5731 
5732 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5733 						     int me, int pipe,
5734 						     enum amdgpu_interrupt_state state)
5735 {
5736 	u32 mec_int_cntl, mec_int_cntl_reg;
5737 
5738 	/*
5739 	 * amdgpu controls only the first MEC. That's why this function only
5740 	 * handles the setting of interrupts for this specific MEC. All other
5741 	 * pipes' interrupts are set by amdkfd.
5742 	 */
5743 
5744 	if (me == 1) {
5745 		switch (pipe) {
5746 		case 0:
5747 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5748 			break;
5749 		case 1:
5750 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5751 			break;
5752 		case 2:
5753 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5754 			break;
5755 		case 3:
5756 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5757 			break;
5758 		default:
5759 			DRM_DEBUG("invalid pipe %d\n", pipe);
5760 			return;
5761 		}
5762 	} else {
5763 		DRM_DEBUG("invalid me %d\n", me);
5764 		return;
5765 	}
5766 
5767 	switch (state) {
5768 	case AMDGPU_IRQ_STATE_DISABLE:
5769 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5770 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5771 					     TIME_STAMP_INT_ENABLE, 0);
5772 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5773 		break;
5774 	case AMDGPU_IRQ_STATE_ENABLE:
5775 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5776 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5777 					     TIME_STAMP_INT_ENABLE, 1);
5778 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5779 		break;
5780 	default:
5781 		break;
5782 	}
5783 }
5784 
5785 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5786 					     struct amdgpu_irq_src *source,
5787 					     unsigned type,
5788 					     enum amdgpu_interrupt_state state)
5789 {
5790 	switch (state) {
5791 	case AMDGPU_IRQ_STATE_DISABLE:
5792 	case AMDGPU_IRQ_STATE_ENABLE:
5793 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5794 			       PRIV_REG_INT_ENABLE,
5795 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5796 		break;
5797 	default:
5798 		break;
5799 	}
5800 
5801 	return 0;
5802 }
5803 
5804 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5805 					      struct amdgpu_irq_src *source,
5806 					      unsigned type,
5807 					      enum amdgpu_interrupt_state state)
5808 {
5809 	switch (state) {
5810 	case AMDGPU_IRQ_STATE_DISABLE:
5811 	case AMDGPU_IRQ_STATE_ENABLE:
5812 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5813 			       PRIV_INSTR_INT_ENABLE,
5814 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5815 		break;
5816 	default:
5817 		break;
5818 	}
5819 
5820 	return 0;
5821 }
5822 
5823 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5824 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5825 			CP_ECC_ERROR_INT_ENABLE, 1)
5826 
5827 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5828 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5829 			CP_ECC_ERROR_INT_ENABLE, 0)
5830 
5831 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5832 					      struct amdgpu_irq_src *source,
5833 					      unsigned type,
5834 					      enum amdgpu_interrupt_state state)
5835 {
5836 	switch (state) {
5837 	case AMDGPU_IRQ_STATE_DISABLE:
5838 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5839 				CP_ECC_ERROR_INT_ENABLE, 0);
5840 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5841 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5842 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5843 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5844 		break;
5845 
5846 	case AMDGPU_IRQ_STATE_ENABLE:
5847 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5848 				CP_ECC_ERROR_INT_ENABLE, 1);
5849 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5850 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5851 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5852 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5853 		break;
5854 	default:
5855 		break;
5856 	}
5857 
5858 	return 0;
5859 }
5860 
5861 
5862 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5863 					    struct amdgpu_irq_src *src,
5864 					    unsigned type,
5865 					    enum amdgpu_interrupt_state state)
5866 {
5867 	switch (type) {
5868 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5869 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5870 		break;
5871 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5872 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5873 		break;
5874 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5875 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5876 		break;
5877 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5878 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5879 		break;
5880 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5881 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5882 		break;
5883 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5884 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5885 		break;
5886 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5887 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5888 		break;
5889 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5890 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5891 		break;
5892 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5893 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5894 		break;
5895 	default:
5896 		break;
5897 	}
5898 	return 0;
5899 }
5900 
5901 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5902 			    struct amdgpu_irq_src *source,
5903 			    struct amdgpu_iv_entry *entry)
5904 {
5905 	int i;
5906 	u8 me_id, pipe_id, queue_id;
5907 	struct amdgpu_ring *ring;
5908 
5909 	DRM_DEBUG("IH: CP EOP\n");
5910 	me_id = (entry->ring_id & 0x0c) >> 2;
5911 	pipe_id = (entry->ring_id & 0x03) >> 0;
5912 	queue_id = (entry->ring_id & 0x70) >> 4;
5913 
5914 	switch (me_id) {
5915 	case 0:
5916 		if (adev->gfx.num_gfx_rings &&
5917 		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5918 			/* Fence signals are handled on the software rings*/
5919 			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5920 				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5921 		}
5922 		break;
5923 	case 1:
5924 	case 2:
5925 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5926 			ring = &adev->gfx.compute_ring[i];
5927 			/* Per-queue interrupt is supported for MEC starting from VI.
5928 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5929 			  */
5930 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5931 				amdgpu_fence_process(ring);
5932 		}
5933 		break;
5934 	}
5935 	return 0;
5936 }
5937 
5938 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5939 			   struct amdgpu_iv_entry *entry)
5940 {
5941 	u8 me_id, pipe_id, queue_id;
5942 	struct amdgpu_ring *ring;
5943 	int i;
5944 
5945 	me_id = (entry->ring_id & 0x0c) >> 2;
5946 	pipe_id = (entry->ring_id & 0x03) >> 0;
5947 	queue_id = (entry->ring_id & 0x70) >> 4;
5948 
5949 	switch (me_id) {
5950 	case 0:
5951 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5952 		break;
5953 	case 1:
5954 	case 2:
5955 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5956 			ring = &adev->gfx.compute_ring[i];
5957 			if (ring->me == me_id && ring->pipe == pipe_id &&
5958 			    ring->queue == queue_id)
5959 				drm_sched_fault(&ring->sched);
5960 		}
5961 		break;
5962 	}
5963 }
5964 
5965 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5966 				 struct amdgpu_irq_src *source,
5967 				 struct amdgpu_iv_entry *entry)
5968 {
5969 	DRM_ERROR("Illegal register access in command stream\n");
5970 	gfx_v9_0_fault(adev, entry);
5971 	return 0;
5972 }
5973 
5974 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5975 				  struct amdgpu_irq_src *source,
5976 				  struct amdgpu_iv_entry *entry)
5977 {
5978 	DRM_ERROR("Illegal instruction in command stream\n");
5979 	gfx_v9_0_fault(adev, entry);
5980 	return 0;
5981 }
5982 
5983 
5984 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5985 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5986 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5987 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5988 	},
5989 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5990 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5991 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5992 	},
5993 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5994 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5995 	  0, 0
5996 	},
5997 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5998 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5999 	  0, 0
6000 	},
6001 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6002 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6003 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6004 	},
6005 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6006 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6007 	  0, 0
6008 	},
6009 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6010 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6011 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6012 	},
6013 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6014 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6015 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6016 	},
6017 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6018 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6019 	  0, 0
6020 	},
6021 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6022 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6023 	  0, 0
6024 	},
6025 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6026 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6027 	  0, 0
6028 	},
6029 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6030 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6031 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6032 	},
6033 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6034 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6035 	  0, 0
6036 	},
6037 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6038 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6039 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6040 	},
6041 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6042 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6043 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6044 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6045 	},
6046 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6047 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6048 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6049 	  0, 0
6050 	},
6051 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6052 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6053 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6054 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6055 	},
6056 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6057 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6058 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6059 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6060 	},
6061 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6062 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6063 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6064 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6065 	},
6066 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6067 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6068 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6069 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6070 	},
6071 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6072 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6073 	  0, 0
6074 	},
6075 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6076 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6077 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6078 	},
6079 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6080 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6081 	  0, 0
6082 	},
6083 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6084 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6085 	  0, 0
6086 	},
6087 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6088 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6089 	  0, 0
6090 	},
6091 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6092 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6093 	  0, 0
6094 	},
6095 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6096 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6097 	  0, 0
6098 	},
6099 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6100 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6101 	  0, 0
6102 	},
6103 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6104 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6105 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6106 	},
6107 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6108 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6109 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6110 	},
6111 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6112 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6113 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6114 	},
6115 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6116 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6117 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6118 	},
6119 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6120 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6121 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6122 	},
6123 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6124 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6125 	  0, 0
6126 	},
6127 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6128 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6129 	  0, 0
6130 	},
6131 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6132 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6133 	  0, 0
6134 	},
6135 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6136 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6137 	  0, 0
6138 	},
6139 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6140 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6141 	  0, 0
6142 	},
6143 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6144 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6145 	  0, 0
6146 	},
6147 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6148 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6149 	  0, 0
6150 	},
6151 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6152 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6153 	  0, 0
6154 	},
6155 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6156 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6157 	  0, 0
6158 	},
6159 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6160 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6161 	  0, 0
6162 	},
6163 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6164 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6165 	  0, 0
6166 	},
6167 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6168 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6169 	  0, 0
6170 	},
6171 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6172 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6173 	  0, 0
6174 	},
6175 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6176 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6177 	  0, 0
6178 	},
6179 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6180 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6181 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6182 	},
6183 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6184 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6185 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6186 	},
6187 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6188 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6189 	  0, 0
6190 	},
6191 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6192 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6193 	  0, 0
6194 	},
6195 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6196 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6197 	  0, 0
6198 	},
6199 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6200 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6201 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6202 	},
6203 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6204 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6205 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6206 	},
6207 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6208 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6209 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6210 	},
6211 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6212 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6213 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6214 	},
6215 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6216 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6217 	  0, 0
6218 	},
6219 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6220 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6221 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6222 	},
6223 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6224 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6225 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6226 	},
6227 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6228 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6229 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6230 	},
6231 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6232 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6233 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6234 	},
6235 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6236 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6237 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6238 	},
6239 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6240 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6241 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6242 	},
6243 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6244 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6245 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6246 	},
6247 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6248 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6249 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6250 	},
6251 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6252 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6253 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6254 	},
6255 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6256 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6257 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6258 	},
6259 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6260 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6261 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6262 	},
6263 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6264 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6265 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6266 	},
6267 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6268 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6269 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6270 	},
6271 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6272 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6273 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6274 	},
6275 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6276 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6277 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6278 	},
6279 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6280 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6281 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6282 	},
6283 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6284 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6285 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6286 	},
6287 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6288 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6289 	  0, 0
6290 	},
6291 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6292 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6293 	  0, 0
6294 	},
6295 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6296 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6297 	  0, 0
6298 	},
6299 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6300 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6301 	  0, 0
6302 	},
6303 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6304 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6305 	  0, 0
6306 	},
6307 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6308 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6309 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6310 	},
6311 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6312 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6313 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6314 	},
6315 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6316 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6317 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6318 	},
6319 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6320 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6321 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6322 	},
6323 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6324 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6325 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6326 	},
6327 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6328 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6329 	  0, 0
6330 	},
6331 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6332 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6333 	  0, 0
6334 	},
6335 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6336 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6337 	  0, 0
6338 	},
6339 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6340 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6341 	  0, 0
6342 	},
6343 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6344 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6345 	  0, 0
6346 	},
6347 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6348 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6349 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6350 	},
6351 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6352 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6353 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6354 	},
6355 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6356 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6357 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6358 	},
6359 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6360 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6361 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6362 	},
6363 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6364 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6365 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6366 	},
6367 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6368 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6369 	  0, 0
6370 	},
6371 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6372 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6373 	  0, 0
6374 	},
6375 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6376 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6377 	  0, 0
6378 	},
6379 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6380 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6381 	  0, 0
6382 	},
6383 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6384 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6385 	  0, 0
6386 	},
6387 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6388 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6389 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6390 	},
6391 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6392 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6393 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6394 	},
6395 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6396 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6397 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6398 	},
6399 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6400 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6401 	  0, 0
6402 	},
6403 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6404 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6405 	  0, 0
6406 	},
6407 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6408 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6409 	  0, 0
6410 	},
6411 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6412 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6413 	  0, 0
6414 	},
6415 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6416 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6417 	  0, 0
6418 	},
6419 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6420 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6421 	  0, 0
6422 	}
6423 };
6424 
6425 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6426 				     void *inject_if, uint32_t instance_mask)
6427 {
6428 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6429 	int ret;
6430 	struct ta_ras_trigger_error_input block_info = { 0 };
6431 
6432 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6433 		return -EINVAL;
6434 
6435 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6436 		return -EINVAL;
6437 
6438 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6439 		return -EPERM;
6440 
6441 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6442 	      info->head.type)) {
6443 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6444 			ras_gfx_subblocks[info->head.sub_block_index].name,
6445 			info->head.type);
6446 		return -EPERM;
6447 	}
6448 
6449 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6450 	      info->head.type)) {
6451 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6452 			ras_gfx_subblocks[info->head.sub_block_index].name,
6453 			info->head.type);
6454 		return -EPERM;
6455 	}
6456 
6457 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6458 	block_info.sub_block_index =
6459 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6460 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6461 	block_info.address = info->address;
6462 	block_info.value = info->value;
6463 
6464 	mutex_lock(&adev->grbm_idx_mutex);
6465 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6466 	mutex_unlock(&adev->grbm_idx_mutex);
6467 
6468 	return ret;
6469 }
6470 
6471 static const char *vml2_mems[] = {
6472 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6473 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6474 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6475 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6476 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6477 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6478 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6479 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6480 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6481 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6482 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6483 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6484 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6485 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6486 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6487 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6488 };
6489 
6490 static const char *vml2_walker_mems[] = {
6491 	"UTC_VML2_CACHE_PDE0_MEM0",
6492 	"UTC_VML2_CACHE_PDE0_MEM1",
6493 	"UTC_VML2_CACHE_PDE1_MEM0",
6494 	"UTC_VML2_CACHE_PDE1_MEM1",
6495 	"UTC_VML2_CACHE_PDE2_MEM0",
6496 	"UTC_VML2_CACHE_PDE2_MEM1",
6497 	"UTC_VML2_RDIF_LOG_FIFO",
6498 };
6499 
6500 static const char *atc_l2_cache_2m_mems[] = {
6501 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6502 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6503 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6504 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6505 };
6506 
6507 static const char *atc_l2_cache_4k_mems[] = {
6508 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6509 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6510 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6511 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6512 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6513 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6514 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6515 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6516 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6517 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6518 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6519 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6520 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6521 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6522 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6523 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6524 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6525 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6526 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6527 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6528 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6529 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6530 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6531 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6532 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6533 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6534 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6535 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6536 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6537 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6538 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6539 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6540 };
6541 
6542 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6543 					 struct ras_err_data *err_data)
6544 {
6545 	uint32_t i, data;
6546 	uint32_t sec_count, ded_count;
6547 
6548 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6549 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6550 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6551 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6552 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6553 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6554 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6555 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6556 
6557 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6558 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6559 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6560 
6561 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6562 		if (sec_count) {
6563 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6564 				"SEC %d\n", i, vml2_mems[i], sec_count);
6565 			err_data->ce_count += sec_count;
6566 		}
6567 
6568 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6569 		if (ded_count) {
6570 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6571 				"DED %d\n", i, vml2_mems[i], ded_count);
6572 			err_data->ue_count += ded_count;
6573 		}
6574 	}
6575 
6576 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6577 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6578 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6579 
6580 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6581 						SEC_COUNT);
6582 		if (sec_count) {
6583 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6584 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6585 			err_data->ce_count += sec_count;
6586 		}
6587 
6588 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6589 						DED_COUNT);
6590 		if (ded_count) {
6591 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6592 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6593 			err_data->ue_count += ded_count;
6594 		}
6595 	}
6596 
6597 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6598 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6599 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6600 
6601 		sec_count = (data & 0x00006000L) >> 0xd;
6602 		if (sec_count) {
6603 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6604 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6605 				sec_count);
6606 			err_data->ce_count += sec_count;
6607 		}
6608 	}
6609 
6610 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6611 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6612 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6613 
6614 		sec_count = (data & 0x00006000L) >> 0xd;
6615 		if (sec_count) {
6616 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6618 				sec_count);
6619 			err_data->ce_count += sec_count;
6620 		}
6621 
6622 		ded_count = (data & 0x00018000L) >> 0xf;
6623 		if (ded_count) {
6624 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6625 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6626 				ded_count);
6627 			err_data->ue_count += ded_count;
6628 		}
6629 	}
6630 
6631 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6632 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6633 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6634 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6635 
6636 	return 0;
6637 }
6638 
6639 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6640 	const struct soc15_reg_entry *reg,
6641 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6642 	uint32_t *sec_count, uint32_t *ded_count)
6643 {
6644 	uint32_t i;
6645 	uint32_t sec_cnt, ded_cnt;
6646 
6647 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6648 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6649 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6650 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6651 			continue;
6652 
6653 		sec_cnt = (value &
6654 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6655 				gfx_v9_0_ras_fields[i].sec_count_shift;
6656 		if (sec_cnt) {
6657 			dev_info(adev->dev, "GFX SubBlock %s, "
6658 				"Instance[%d][%d], SEC %d\n",
6659 				gfx_v9_0_ras_fields[i].name,
6660 				se_id, inst_id,
6661 				sec_cnt);
6662 			*sec_count += sec_cnt;
6663 		}
6664 
6665 		ded_cnt = (value &
6666 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6667 				gfx_v9_0_ras_fields[i].ded_count_shift;
6668 		if (ded_cnt) {
6669 			dev_info(adev->dev, "GFX SubBlock %s, "
6670 				"Instance[%d][%d], DED %d\n",
6671 				gfx_v9_0_ras_fields[i].name,
6672 				se_id, inst_id,
6673 				ded_cnt);
6674 			*ded_count += ded_cnt;
6675 		}
6676 	}
6677 
6678 	return 0;
6679 }
6680 
6681 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6682 {
6683 	int i, j, k;
6684 
6685 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6686 		return;
6687 
6688 	/* read back registers to clear the counters */
6689 	mutex_lock(&adev->grbm_idx_mutex);
6690 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6691 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6692 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6693 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6694 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6695 			}
6696 		}
6697 	}
6698 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6699 	mutex_unlock(&adev->grbm_idx_mutex);
6700 
6701 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6702 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6703 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6704 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6705 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6706 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6707 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6708 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6709 
6710 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6711 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6712 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6713 	}
6714 
6715 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6716 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6717 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6718 	}
6719 
6720 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6721 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6722 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6723 	}
6724 
6725 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6726 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6727 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6728 	}
6729 
6730 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6731 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6732 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6733 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6734 }
6735 
6736 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6737 					  void *ras_error_status)
6738 {
6739 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6740 	uint32_t sec_count = 0, ded_count = 0;
6741 	uint32_t i, j, k;
6742 	uint32_t reg_value;
6743 
6744 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6745 		return;
6746 
6747 	err_data->ue_count = 0;
6748 	err_data->ce_count = 0;
6749 
6750 	mutex_lock(&adev->grbm_idx_mutex);
6751 
6752 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6753 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6754 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6755 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6756 				reg_value =
6757 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6758 				if (reg_value)
6759 					gfx_v9_0_ras_error_count(adev,
6760 						&gfx_v9_0_edc_counter_regs[i],
6761 						j, k, reg_value,
6762 						&sec_count, &ded_count);
6763 			}
6764 		}
6765 	}
6766 
6767 	err_data->ce_count += sec_count;
6768 	err_data->ue_count += ded_count;
6769 
6770 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6771 	mutex_unlock(&adev->grbm_idx_mutex);
6772 
6773 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6774 }
6775 
6776 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6777 {
6778 	const unsigned int cp_coher_cntl =
6779 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6780 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6781 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6782 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6783 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6784 
6785 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6786 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6787 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6788 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6789 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6790 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6791 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6792 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6793 }
6794 
6795 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6796 					uint32_t pipe, bool enable)
6797 {
6798 	struct amdgpu_device *adev = ring->adev;
6799 	uint32_t val;
6800 	uint32_t wcl_cs_reg;
6801 
6802 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6803 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6804 
6805 	switch (pipe) {
6806 	case 0:
6807 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6808 		break;
6809 	case 1:
6810 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6811 		break;
6812 	case 2:
6813 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6814 		break;
6815 	case 3:
6816 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6817 		break;
6818 	default:
6819 		DRM_DEBUG("invalid pipe %d\n", pipe);
6820 		return;
6821 	}
6822 
6823 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6824 
6825 }
6826 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6827 {
6828 	struct amdgpu_device *adev = ring->adev;
6829 	uint32_t val;
6830 	int i;
6831 
6832 
6833 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6834 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6835 	 * around 25% of gpu resources.
6836 	 */
6837 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6838 	amdgpu_ring_emit_wreg(ring,
6839 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6840 			      val);
6841 
6842 	/* Restrict waves for normal/low priority compute queues as well
6843 	 * to get best QoS for high priority compute jobs.
6844 	 *
6845 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6846 	 */
6847 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6848 		if (i != ring->pipe)
6849 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6850 
6851 	}
6852 }
6853 
6854 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6855 	.name = "gfx_v9_0",
6856 	.early_init = gfx_v9_0_early_init,
6857 	.late_init = gfx_v9_0_late_init,
6858 	.sw_init = gfx_v9_0_sw_init,
6859 	.sw_fini = gfx_v9_0_sw_fini,
6860 	.hw_init = gfx_v9_0_hw_init,
6861 	.hw_fini = gfx_v9_0_hw_fini,
6862 	.suspend = gfx_v9_0_suspend,
6863 	.resume = gfx_v9_0_resume,
6864 	.is_idle = gfx_v9_0_is_idle,
6865 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6866 	.soft_reset = gfx_v9_0_soft_reset,
6867 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6868 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6869 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6870 };
6871 
6872 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6873 	.type = AMDGPU_RING_TYPE_GFX,
6874 	.align_mask = 0xff,
6875 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6876 	.support_64bit_ptrs = true,
6877 	.secure_submission_supported = true,
6878 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6879 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6880 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6881 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6882 		5 +  /* COND_EXEC */
6883 		7 +  /* PIPELINE_SYNC */
6884 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6885 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6886 		2 + /* VM_FLUSH */
6887 		8 +  /* FENCE for VM_FLUSH */
6888 		20 + /* GDS switch */
6889 		4 + /* double SWITCH_BUFFER,
6890 		       the first COND_EXEC jump to the place just
6891 			   prior to this double SWITCH_BUFFER  */
6892 		5 + /* COND_EXEC */
6893 		7 +	 /*	HDP_flush */
6894 		4 +	 /*	VGT_flush */
6895 		14 + /*	CE_META */
6896 		31 + /*	DE_META */
6897 		3 + /* CNTX_CTRL */
6898 		5 + /* HDP_INVL */
6899 		8 + 8 + /* FENCE x2 */
6900 		2 + /* SWITCH_BUFFER */
6901 		7, /* gfx_v9_0_emit_mem_sync */
6902 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6903 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6904 	.emit_fence = gfx_v9_0_ring_emit_fence,
6905 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6906 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6907 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6908 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6909 	.test_ring = gfx_v9_0_ring_test_ring,
6910 	.insert_nop = amdgpu_ring_insert_nop,
6911 	.pad_ib = amdgpu_ring_generic_pad_ib,
6912 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6913 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6914 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6915 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6916 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
6917 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6918 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6919 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6920 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6921 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6922 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6923 };
6924 
6925 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6926 	.type = AMDGPU_RING_TYPE_GFX,
6927 	.align_mask = 0xff,
6928 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6929 	.support_64bit_ptrs = true,
6930 	.secure_submission_supported = true,
6931 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6932 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6933 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6934 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6935 		5 +  /* COND_EXEC */
6936 		7 +  /* PIPELINE_SYNC */
6937 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6938 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6939 		2 + /* VM_FLUSH */
6940 		8 +  /* FENCE for VM_FLUSH */
6941 		20 + /* GDS switch */
6942 		4 + /* double SWITCH_BUFFER,
6943 		     * the first COND_EXEC jump to the place just
6944 		     * prior to this double SWITCH_BUFFER
6945 		     */
6946 		5 + /* COND_EXEC */
6947 		7 +	 /*	HDP_flush */
6948 		4 +	 /*	VGT_flush */
6949 		14 + /*	CE_META */
6950 		31 + /*	DE_META */
6951 		3 + /* CNTX_CTRL */
6952 		5 + /* HDP_INVL */
6953 		8 + 8 + /* FENCE x2 */
6954 		2 + /* SWITCH_BUFFER */
6955 		7, /* gfx_v9_0_emit_mem_sync */
6956 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6957 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6958 	.emit_fence = gfx_v9_0_ring_emit_fence,
6959 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6960 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6961 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6962 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6963 	.test_ring = gfx_v9_0_ring_test_ring,
6964 	.test_ib = gfx_v9_0_ring_test_ib,
6965 	.insert_nop = amdgpu_sw_ring_insert_nop,
6966 	.pad_ib = amdgpu_ring_generic_pad_ib,
6967 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6968 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6969 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6970 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6971 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6972 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6973 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6974 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6975 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6976 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6977 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
6978 	.patch_de = gfx_v9_0_ring_patch_de_meta,
6979 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
6980 };
6981 
6982 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6983 	.type = AMDGPU_RING_TYPE_COMPUTE,
6984 	.align_mask = 0xff,
6985 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6986 	.support_64bit_ptrs = true,
6987 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6988 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6989 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6990 	.emit_frame_size =
6991 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6992 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6993 		5 + /* hdp invalidate */
6994 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6995 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6996 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6997 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6998 		7 + /* gfx_v9_0_emit_mem_sync */
6999 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7000 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7001 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7002 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7003 	.emit_fence = gfx_v9_0_ring_emit_fence,
7004 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7005 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7006 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7007 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7008 	.test_ring = gfx_v9_0_ring_test_ring,
7009 	.test_ib = gfx_v9_0_ring_test_ib,
7010 	.insert_nop = amdgpu_ring_insert_nop,
7011 	.pad_ib = amdgpu_ring_generic_pad_ib,
7012 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7013 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7014 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7015 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7016 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7017 };
7018 
7019 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7020 	.type = AMDGPU_RING_TYPE_KIQ,
7021 	.align_mask = 0xff,
7022 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7023 	.support_64bit_ptrs = true,
7024 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7025 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7026 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7027 	.emit_frame_size =
7028 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7029 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7030 		5 + /* hdp invalidate */
7031 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7032 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7033 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7034 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7035 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7036 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7037 	.test_ring = gfx_v9_0_ring_test_ring,
7038 	.insert_nop = amdgpu_ring_insert_nop,
7039 	.pad_ib = amdgpu_ring_generic_pad_ib,
7040 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7041 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7042 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7043 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7044 };
7045 
7046 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7047 {
7048 	int i;
7049 
7050 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7051 
7052 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7053 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7054 
7055 	if (adev->gfx.num_gfx_rings) {
7056 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7057 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7058 	}
7059 
7060 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7061 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7062 }
7063 
7064 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7065 	.set = gfx_v9_0_set_eop_interrupt_state,
7066 	.process = gfx_v9_0_eop_irq,
7067 };
7068 
7069 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7070 	.set = gfx_v9_0_set_priv_reg_fault_state,
7071 	.process = gfx_v9_0_priv_reg_irq,
7072 };
7073 
7074 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7075 	.set = gfx_v9_0_set_priv_inst_fault_state,
7076 	.process = gfx_v9_0_priv_inst_irq,
7077 };
7078 
7079 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7080 	.set = gfx_v9_0_set_cp_ecc_error_state,
7081 	.process = amdgpu_gfx_cp_ecc_error_irq,
7082 };
7083 
7084 
7085 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7086 {
7087 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7088 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7089 
7090 	adev->gfx.priv_reg_irq.num_types = 1;
7091 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7092 
7093 	adev->gfx.priv_inst_irq.num_types = 1;
7094 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7095 
7096 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7097 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7098 }
7099 
7100 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7101 {
7102 	switch (adev->ip_versions[GC_HWIP][0]) {
7103 	case IP_VERSION(9, 0, 1):
7104 	case IP_VERSION(9, 2, 1):
7105 	case IP_VERSION(9, 4, 0):
7106 	case IP_VERSION(9, 2, 2):
7107 	case IP_VERSION(9, 1, 0):
7108 	case IP_VERSION(9, 4, 1):
7109 	case IP_VERSION(9, 3, 0):
7110 	case IP_VERSION(9, 4, 2):
7111 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7112 		break;
7113 	default:
7114 		break;
7115 	}
7116 }
7117 
7118 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7119 {
7120 	/* init asci gds info */
7121 	switch (adev->ip_versions[GC_HWIP][0]) {
7122 	case IP_VERSION(9, 0, 1):
7123 	case IP_VERSION(9, 2, 1):
7124 	case IP_VERSION(9, 4, 0):
7125 		adev->gds.gds_size = 0x10000;
7126 		break;
7127 	case IP_VERSION(9, 2, 2):
7128 	case IP_VERSION(9, 1, 0):
7129 	case IP_VERSION(9, 4, 1):
7130 		adev->gds.gds_size = 0x1000;
7131 		break;
7132 	case IP_VERSION(9, 4, 2):
7133 		/* aldebaran removed all the GDS internal memory,
7134 		 * only support GWS opcode in kernel, like barrier
7135 		 * semaphore.etc */
7136 		adev->gds.gds_size = 0;
7137 		break;
7138 	default:
7139 		adev->gds.gds_size = 0x10000;
7140 		break;
7141 	}
7142 
7143 	switch (adev->ip_versions[GC_HWIP][0]) {
7144 	case IP_VERSION(9, 0, 1):
7145 	case IP_VERSION(9, 4, 0):
7146 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7147 		break;
7148 	case IP_VERSION(9, 2, 1):
7149 		adev->gds.gds_compute_max_wave_id = 0x27f;
7150 		break;
7151 	case IP_VERSION(9, 2, 2):
7152 	case IP_VERSION(9, 1, 0):
7153 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7154 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7155 		else
7156 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7157 		break;
7158 	case IP_VERSION(9, 4, 1):
7159 		adev->gds.gds_compute_max_wave_id = 0xfff;
7160 		break;
7161 	case IP_VERSION(9, 4, 2):
7162 		/* deprecated for Aldebaran, no usage at all */
7163 		adev->gds.gds_compute_max_wave_id = 0;
7164 		break;
7165 	default:
7166 		/* this really depends on the chip */
7167 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7168 		break;
7169 	}
7170 
7171 	adev->gds.gws_size = 64;
7172 	adev->gds.oa_size = 16;
7173 }
7174 
7175 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7176 						 u32 bitmap)
7177 {
7178 	u32 data;
7179 
7180 	if (!bitmap)
7181 		return;
7182 
7183 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7184 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7185 
7186 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7187 }
7188 
7189 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7190 {
7191 	u32 data, mask;
7192 
7193 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7194 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7195 
7196 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7197 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7198 
7199 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7200 
7201 	return (~data) & mask;
7202 }
7203 
7204 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7205 				 struct amdgpu_cu_info *cu_info)
7206 {
7207 	int i, j, k, counter, active_cu_number = 0;
7208 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7209 	unsigned disable_masks[4 * 4];
7210 
7211 	if (!adev || !cu_info)
7212 		return -EINVAL;
7213 
7214 	/*
7215 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7216 	 */
7217 	if (adev->gfx.config.max_shader_engines *
7218 		adev->gfx.config.max_sh_per_se > 16)
7219 		return -EINVAL;
7220 
7221 	amdgpu_gfx_parse_disable_cu(disable_masks,
7222 				    adev->gfx.config.max_shader_engines,
7223 				    adev->gfx.config.max_sh_per_se);
7224 
7225 	mutex_lock(&adev->grbm_idx_mutex);
7226 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7227 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7228 			mask = 1;
7229 			ao_bitmap = 0;
7230 			counter = 0;
7231 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7232 			gfx_v9_0_set_user_cu_inactive_bitmap(
7233 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7234 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7235 
7236 			/*
7237 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7238 			 * 4x4 size array, and it's usually suitable for Vega
7239 			 * ASICs which has 4*2 SE/SH layout.
7240 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7241 			 * To mostly reduce the impact, we make it compatible
7242 			 * with current bitmap array as below:
7243 			 *    SE4,SH0 --> bitmap[0][1]
7244 			 *    SE5,SH0 --> bitmap[1][1]
7245 			 *    SE6,SH0 --> bitmap[2][1]
7246 			 *    SE7,SH0 --> bitmap[3][1]
7247 			 */
7248 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7249 
7250 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7251 				if (bitmap & mask) {
7252 					if (counter < adev->gfx.config.max_cu_per_sh)
7253 						ao_bitmap |= mask;
7254 					counter ++;
7255 				}
7256 				mask <<= 1;
7257 			}
7258 			active_cu_number += counter;
7259 			if (i < 2 && j < 2)
7260 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7261 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7262 		}
7263 	}
7264 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7265 	mutex_unlock(&adev->grbm_idx_mutex);
7266 
7267 	cu_info->number = active_cu_number;
7268 	cu_info->ao_cu_mask = ao_cu_mask;
7269 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7270 
7271 	return 0;
7272 }
7273 
7274 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7275 {
7276 	.type = AMD_IP_BLOCK_TYPE_GFX,
7277 	.major = 9,
7278 	.minor = 0,
7279 	.rev = 0,
7280 	.funcs = &gfx_v9_0_ip_funcs,
7281 };
7282