xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision d6e0cbb1)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15.h"
44 #include "soc15_common.h"
45 #include "clearstate_gfx9.h"
46 #include "v9_structs.h"
47 
48 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 
50 #include "amdgpu_ras.h"
51 
52 #define GFX9_NUM_GFX_RINGS     1
53 #define GFX9_MEC_HPD_SIZE 4096
54 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
55 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
56 
57 #define mmPWR_MISC_CNTL_STATUS					0x0183
58 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
61 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
62 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
63 
64 #define mmGCEA_PROBE_MAP                        0x070c
65 #define mmGCEA_PROBE_MAP_BASE_IDX               0
66 
67 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
73 
74 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
80 
81 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
87 
88 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
89 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
90 MODULE_FIRMWARE("amdgpu/raven_me.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
93 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
94 
95 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
102 
103 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
109 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
110 
111 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 
122 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
123 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
124 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
125 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
126 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
127 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
128 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
129 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
130 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
131 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
133 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
134 
135 enum ta_ras_gfx_subblock {
136 	/*CPC*/
137 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
138 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
139 	TA_RAS_BLOCK__GFX_CPC_UCODE,
140 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
141 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
142 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
143 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
144 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
145 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147 	/* CPF*/
148 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
151 	TA_RAS_BLOCK__GFX_CPF_TAG,
152 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
153 	/* CPG*/
154 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_TAG,
158 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
159 	/* GDS*/
160 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167 	/* SPI*/
168 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
169 	/* SQ*/
170 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
173 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
174 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
175 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
176 	/* SQC (3 ranges)*/
177 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
178 	/* SQC range 0*/
179 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
180 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
181 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
187 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
188 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
189 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
190 	/* SQC range 1*/
191 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
192 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
193 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
196 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
202 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
203 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
204 	/* SQC range 2*/
205 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
206 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
207 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
210 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
216 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
217 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
218 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
219 	/* TA*/
220 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
222 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
225 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227 	/* TCA*/
228 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232 	/* TCC (5 sub-ranges)*/
233 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
234 	/* TCC range 0*/
235 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
241 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
242 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245 	/* TCC range 1*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
249 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
250 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
251 	/* TCC range 2*/
252 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
255 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
257 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
259 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
260 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
261 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
262 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
263 	/* TCC range 3*/
264 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
267 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
268 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
269 	/* TCC range 4*/
270 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
271 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
272 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
273 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
275 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
276 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
277 	/* TCI*/
278 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
279 	/* TCP*/
280 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
283 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
285 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
287 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289 	/* TD*/
290 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
292 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
293 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295 	/* EA (3 sub-ranges)*/
296 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
297 	/* EA range 0*/
298 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
301 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
302 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
306 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308 	/* EA range 1*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
312 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318 	/* EA range 2*/
319 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
323 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
326 	/* UTC VM L2 bank*/
327 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
328 	/* UTC VM walker*/
329 	TA_RAS_BLOCK__UTC_VML2_WALKER,
330 	/* UTC ATC L2 2MB cache*/
331 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
332 	/* UTC ATC L2 4KB cache*/
333 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
334 	TA_RAS_BLOCK__GFX_MAX
335 };
336 
337 struct ras_gfx_subblock {
338 	unsigned char *name;
339 	int ta_subblock;
340 	int hw_supported_error_type;
341 	int sw_supported_error_type;
342 };
343 
344 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
345 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
346 		#subblock,                                                     \
347 		TA_RAS_BLOCK__##subblock,                                      \
348 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
349 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
350 	}
351 
352 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
370 			     0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
372 			     0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
381 			     0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
385 			     0, 0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
387 			     0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
389 			     0, 0),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391 			     0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
393 			     1),
394 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
395 			     0, 0, 0),
396 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
397 			     0),
398 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
399 			     0),
400 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
401 			     0),
402 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
403 			     0),
404 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 			     0),
406 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
407 			     0, 0),
408 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
409 			     0),
410 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
411 			     0),
412 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
413 			     0, 0, 0),
414 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
415 			     0),
416 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 			     0),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
419 			     0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
421 			     0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 			     0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
425 			     0, 0),
426 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
427 			     0),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
437 			     1),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
439 			     1),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
441 			     1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
443 			     0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
445 			     0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
461 			     0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
463 			     0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
465 			     0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
500 };
501 
502 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
503 {
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
524 };
525 
526 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
527 {
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
546 };
547 
548 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
549 {
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
561 };
562 
563 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
564 {
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
589 };
590 
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
592 {
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
600 };
601 
602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
603 {
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
623 };
624 
625 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
626 {
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22010042),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22010042),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641 
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668 
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
684 };
685 
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696 };
697 
698 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
699 {
700 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708 };
709 
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
711 {
712 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720 };
721 
722 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
723 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
724 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
725 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
726 
727 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
731 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
732                                  struct amdgpu_cu_info *cu_info);
733 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
734 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
735 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
736 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
737 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
738 					  void *ras_error_status);
739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
740 				     void *inject_if);
741 
742 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
743 {
744 	switch (adev->asic_type) {
745 	case CHIP_VEGA10:
746 		soc15_program_register_sequence(adev,
747 						golden_settings_gc_9_0,
748 						ARRAY_SIZE(golden_settings_gc_9_0));
749 		soc15_program_register_sequence(adev,
750 						golden_settings_gc_9_0_vg10,
751 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
752 		break;
753 	case CHIP_VEGA12:
754 		soc15_program_register_sequence(adev,
755 						golden_settings_gc_9_2_1,
756 						ARRAY_SIZE(golden_settings_gc_9_2_1));
757 		soc15_program_register_sequence(adev,
758 						golden_settings_gc_9_2_1_vg12,
759 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
760 		break;
761 	case CHIP_VEGA20:
762 		soc15_program_register_sequence(adev,
763 						golden_settings_gc_9_0,
764 						ARRAY_SIZE(golden_settings_gc_9_0));
765 		soc15_program_register_sequence(adev,
766 						golden_settings_gc_9_0_vg20,
767 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
768 		break;
769 	case CHIP_ARCTURUS:
770 		soc15_program_register_sequence(adev,
771 						golden_settings_gc_9_4_1_arct,
772 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
773 		break;
774 	case CHIP_RAVEN:
775 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
776 						ARRAY_SIZE(golden_settings_gc_9_1));
777 		if (adev->rev_id >= 8)
778 			soc15_program_register_sequence(adev,
779 							golden_settings_gc_9_1_rv2,
780 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
781 		else
782 			soc15_program_register_sequence(adev,
783 							golden_settings_gc_9_1_rv1,
784 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
785 		break;
786 	 case CHIP_RENOIR:
787 		soc15_program_register_sequence(adev,
788 						golden_settings_gc_9_1_rn,
789 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
790 		break;
791 	default:
792 		break;
793 	}
794 
795 	if (adev->asic_type != CHIP_ARCTURUS)
796 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
797 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
798 }
799 
800 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
801 {
802 	adev->gfx.scratch.num_reg = 8;
803 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
804 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
805 }
806 
807 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
808 				       bool wc, uint32_t reg, uint32_t val)
809 {
810 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
811 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
812 				WRITE_DATA_DST_SEL(0) |
813 				(wc ? WR_CONFIRM : 0));
814 	amdgpu_ring_write(ring, reg);
815 	amdgpu_ring_write(ring, 0);
816 	amdgpu_ring_write(ring, val);
817 }
818 
819 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
820 				  int mem_space, int opt, uint32_t addr0,
821 				  uint32_t addr1, uint32_t ref, uint32_t mask,
822 				  uint32_t inv)
823 {
824 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
825 	amdgpu_ring_write(ring,
826 				 /* memory (1) or register (0) */
827 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
828 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
829 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
830 				 WAIT_REG_MEM_ENGINE(eng_sel)));
831 
832 	if (mem_space)
833 		BUG_ON(addr0 & 0x3); /* Dword align */
834 	amdgpu_ring_write(ring, addr0);
835 	amdgpu_ring_write(ring, addr1);
836 	amdgpu_ring_write(ring, ref);
837 	amdgpu_ring_write(ring, mask);
838 	amdgpu_ring_write(ring, inv); /* poll interval */
839 }
840 
841 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
842 {
843 	struct amdgpu_device *adev = ring->adev;
844 	uint32_t scratch;
845 	uint32_t tmp = 0;
846 	unsigned i;
847 	int r;
848 
849 	r = amdgpu_gfx_scratch_get(adev, &scratch);
850 	if (r)
851 		return r;
852 
853 	WREG32(scratch, 0xCAFEDEAD);
854 	r = amdgpu_ring_alloc(ring, 3);
855 	if (r)
856 		goto error_free_scratch;
857 
858 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
859 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
860 	amdgpu_ring_write(ring, 0xDEADBEEF);
861 	amdgpu_ring_commit(ring);
862 
863 	for (i = 0; i < adev->usec_timeout; i++) {
864 		tmp = RREG32(scratch);
865 		if (tmp == 0xDEADBEEF)
866 			break;
867 		udelay(1);
868 	}
869 
870 	if (i >= adev->usec_timeout)
871 		r = -ETIMEDOUT;
872 
873 error_free_scratch:
874 	amdgpu_gfx_scratch_free(adev, scratch);
875 	return r;
876 }
877 
878 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
879 {
880 	struct amdgpu_device *adev = ring->adev;
881 	struct amdgpu_ib ib;
882 	struct dma_fence *f = NULL;
883 
884 	unsigned index;
885 	uint64_t gpu_addr;
886 	uint32_t tmp;
887 	long r;
888 
889 	r = amdgpu_device_wb_get(adev, &index);
890 	if (r)
891 		return r;
892 
893 	gpu_addr = adev->wb.gpu_addr + (index * 4);
894 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
895 	memset(&ib, 0, sizeof(ib));
896 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
897 	if (r)
898 		goto err1;
899 
900 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
901 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
902 	ib.ptr[2] = lower_32_bits(gpu_addr);
903 	ib.ptr[3] = upper_32_bits(gpu_addr);
904 	ib.ptr[4] = 0xDEADBEEF;
905 	ib.length_dw = 5;
906 
907 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
908 	if (r)
909 		goto err2;
910 
911 	r = dma_fence_wait_timeout(f, false, timeout);
912 	if (r == 0) {
913 		r = -ETIMEDOUT;
914 		goto err2;
915 	} else if (r < 0) {
916 		goto err2;
917 	}
918 
919 	tmp = adev->wb.wb[index];
920 	if (tmp == 0xDEADBEEF)
921 		r = 0;
922 	else
923 		r = -EINVAL;
924 
925 err2:
926 	amdgpu_ib_free(adev, &ib, NULL);
927 	dma_fence_put(f);
928 err1:
929 	amdgpu_device_wb_free(adev, index);
930 	return r;
931 }
932 
933 
934 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
935 {
936 	release_firmware(adev->gfx.pfp_fw);
937 	adev->gfx.pfp_fw = NULL;
938 	release_firmware(adev->gfx.me_fw);
939 	adev->gfx.me_fw = NULL;
940 	release_firmware(adev->gfx.ce_fw);
941 	adev->gfx.ce_fw = NULL;
942 	release_firmware(adev->gfx.rlc_fw);
943 	adev->gfx.rlc_fw = NULL;
944 	release_firmware(adev->gfx.mec_fw);
945 	adev->gfx.mec_fw = NULL;
946 	release_firmware(adev->gfx.mec2_fw);
947 	adev->gfx.mec2_fw = NULL;
948 
949 	kfree(adev->gfx.rlc.register_list_format);
950 }
951 
952 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
953 {
954 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
955 
956 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
957 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
958 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
959 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
960 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
961 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
962 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
963 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
964 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
965 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
966 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
967 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
968 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
969 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
970 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
971 }
972 
973 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
974 {
975 	adev->gfx.me_fw_write_wait = false;
976 	adev->gfx.mec_fw_write_wait = false;
977 
978 	switch (adev->asic_type) {
979 	case CHIP_VEGA10:
980 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981 		    (adev->gfx.me_feature_version >= 42) &&
982 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
983 		    (adev->gfx.pfp_feature_version >= 42))
984 			adev->gfx.me_fw_write_wait = true;
985 
986 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
987 		    (adev->gfx.mec_feature_version >= 42))
988 			adev->gfx.mec_fw_write_wait = true;
989 		break;
990 	case CHIP_VEGA12:
991 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
992 		    (adev->gfx.me_feature_version >= 44) &&
993 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
994 		    (adev->gfx.pfp_feature_version >= 44))
995 			adev->gfx.me_fw_write_wait = true;
996 
997 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
998 		    (adev->gfx.mec_feature_version >= 44))
999 			adev->gfx.mec_fw_write_wait = true;
1000 		break;
1001 	case CHIP_VEGA20:
1002 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1003 		    (adev->gfx.me_feature_version >= 44) &&
1004 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1005 		    (adev->gfx.pfp_feature_version >= 44))
1006 			adev->gfx.me_fw_write_wait = true;
1007 
1008 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1009 		    (adev->gfx.mec_feature_version >= 44))
1010 			adev->gfx.mec_fw_write_wait = true;
1011 		break;
1012 	case CHIP_RAVEN:
1013 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1014 		    (adev->gfx.me_feature_version >= 42) &&
1015 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1016 		    (adev->gfx.pfp_feature_version >= 42))
1017 			adev->gfx.me_fw_write_wait = true;
1018 
1019 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1020 		    (adev->gfx.mec_feature_version >= 42))
1021 			adev->gfx.mec_fw_write_wait = true;
1022 		break;
1023 	default:
1024 		break;
1025 	}
1026 }
1027 
1028 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1029 {
1030 	switch (adev->asic_type) {
1031 	case CHIP_VEGA10:
1032 	case CHIP_VEGA12:
1033 	case CHIP_VEGA20:
1034 		break;
1035 	case CHIP_RAVEN:
1036 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1037 			break;
1038 		if ((adev->gfx.rlc_fw_version != 106 &&
1039 		     adev->gfx.rlc_fw_version < 531) ||
1040 		    (adev->gfx.rlc_fw_version == 53815) ||
1041 		    (adev->gfx.rlc_feature_version < 1) ||
1042 		    !adev->gfx.rlc.is_rlc_v2_1)
1043 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1044 		break;
1045 	default:
1046 		break;
1047 	}
1048 }
1049 
1050 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1051 					  const char *chip_name)
1052 {
1053 	char fw_name[30];
1054 	int err;
1055 	struct amdgpu_firmware_info *info = NULL;
1056 	const struct common_firmware_header *header = NULL;
1057 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1058 
1059 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1060 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1061 	if (err)
1062 		goto out;
1063 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1064 	if (err)
1065 		goto out;
1066 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1067 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1068 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1069 
1070 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1071 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1072 	if (err)
1073 		goto out;
1074 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1075 	if (err)
1076 		goto out;
1077 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1078 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1079 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1080 
1081 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1082 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1083 	if (err)
1084 		goto out;
1085 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1086 	if (err)
1087 		goto out;
1088 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1089 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1090 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1091 
1092 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1093 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1094 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1095 		info->fw = adev->gfx.pfp_fw;
1096 		header = (const struct common_firmware_header *)info->fw->data;
1097 		adev->firmware.fw_size +=
1098 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099 
1100 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1101 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1102 		info->fw = adev->gfx.me_fw;
1103 		header = (const struct common_firmware_header *)info->fw->data;
1104 		adev->firmware.fw_size +=
1105 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1106 
1107 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1108 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1109 		info->fw = adev->gfx.ce_fw;
1110 		header = (const struct common_firmware_header *)info->fw->data;
1111 		adev->firmware.fw_size +=
1112 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1113 	}
1114 
1115 out:
1116 	if (err) {
1117 		dev_err(adev->dev,
1118 			"gfx9: Failed to load firmware \"%s\"\n",
1119 			fw_name);
1120 		release_firmware(adev->gfx.pfp_fw);
1121 		adev->gfx.pfp_fw = NULL;
1122 		release_firmware(adev->gfx.me_fw);
1123 		adev->gfx.me_fw = NULL;
1124 		release_firmware(adev->gfx.ce_fw);
1125 		adev->gfx.ce_fw = NULL;
1126 	}
1127 	return err;
1128 }
1129 
1130 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1131 					  const char *chip_name)
1132 {
1133 	char fw_name[30];
1134 	int err;
1135 	struct amdgpu_firmware_info *info = NULL;
1136 	const struct common_firmware_header *header = NULL;
1137 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1138 	unsigned int *tmp = NULL;
1139 	unsigned int i = 0;
1140 	uint16_t version_major;
1141 	uint16_t version_minor;
1142 	uint32_t smu_version;
1143 
1144 	/*
1145 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1146 	 * instead of picasso_rlc.bin.
1147 	 * Judgment method:
1148 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1149 	 *          or revision >= 0xD8 && revision <= 0xDF
1150 	 * otherwise is PCO FP5
1151 	 */
1152 	if (!strcmp(chip_name, "picasso") &&
1153 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1154 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1155 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1156 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1157 		(smu_version >= 0x41e2b))
1158 		/**
1159 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1160 		*/
1161 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1162 	else
1163 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1164 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1165 	if (err)
1166 		goto out;
1167 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1168 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1169 
1170 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1171 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1172 	if (version_major == 2 && version_minor == 1)
1173 		adev->gfx.rlc.is_rlc_v2_1 = true;
1174 
1175 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1176 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1177 	adev->gfx.rlc.save_and_restore_offset =
1178 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1179 	adev->gfx.rlc.clear_state_descriptor_offset =
1180 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1181 	adev->gfx.rlc.avail_scratch_ram_locations =
1182 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1183 	adev->gfx.rlc.reg_restore_list_size =
1184 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1185 	adev->gfx.rlc.reg_list_format_start =
1186 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1187 	adev->gfx.rlc.reg_list_format_separate_start =
1188 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1189 	adev->gfx.rlc.starting_offsets_start =
1190 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1191 	adev->gfx.rlc.reg_list_format_size_bytes =
1192 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1193 	adev->gfx.rlc.reg_list_size_bytes =
1194 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1195 	adev->gfx.rlc.register_list_format =
1196 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1197 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1198 	if (!adev->gfx.rlc.register_list_format) {
1199 		err = -ENOMEM;
1200 		goto out;
1201 	}
1202 
1203 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1204 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1205 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1206 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1207 
1208 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1209 
1210 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1211 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1212 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1213 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1214 
1215 	if (adev->gfx.rlc.is_rlc_v2_1)
1216 		gfx_v9_0_init_rlc_ext_microcode(adev);
1217 
1218 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1219 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1220 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1221 		info->fw = adev->gfx.rlc_fw;
1222 		header = (const struct common_firmware_header *)info->fw->data;
1223 		adev->firmware.fw_size +=
1224 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1225 
1226 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1227 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1228 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1229 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1230 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1231 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1232 			info->fw = adev->gfx.rlc_fw;
1233 			adev->firmware.fw_size +=
1234 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1235 
1236 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1237 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1238 			info->fw = adev->gfx.rlc_fw;
1239 			adev->firmware.fw_size +=
1240 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1241 
1242 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1243 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1244 			info->fw = adev->gfx.rlc_fw;
1245 			adev->firmware.fw_size +=
1246 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1247 		}
1248 	}
1249 
1250 out:
1251 	if (err) {
1252 		dev_err(adev->dev,
1253 			"gfx9: Failed to load firmware \"%s\"\n",
1254 			fw_name);
1255 		release_firmware(adev->gfx.rlc_fw);
1256 		adev->gfx.rlc_fw = NULL;
1257 	}
1258 	return err;
1259 }
1260 
1261 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1262 					  const char *chip_name)
1263 {
1264 	char fw_name[30];
1265 	int err;
1266 	struct amdgpu_firmware_info *info = NULL;
1267 	const struct common_firmware_header *header = NULL;
1268 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1269 
1270 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1271 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1272 	if (err)
1273 		goto out;
1274 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1275 	if (err)
1276 		goto out;
1277 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1278 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1279 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1280 
1281 
1282 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1283 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1284 	if (!err) {
1285 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1286 		if (err)
1287 			goto out;
1288 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1289 		adev->gfx.mec2_fw->data;
1290 		adev->gfx.mec2_fw_version =
1291 		le32_to_cpu(cp_hdr->header.ucode_version);
1292 		adev->gfx.mec2_feature_version =
1293 		le32_to_cpu(cp_hdr->ucode_feature_version);
1294 	} else {
1295 		err = 0;
1296 		adev->gfx.mec2_fw = NULL;
1297 	}
1298 
1299 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1300 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1301 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1302 		info->fw = adev->gfx.mec_fw;
1303 		header = (const struct common_firmware_header *)info->fw->data;
1304 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1305 		adev->firmware.fw_size +=
1306 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1307 
1308 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1309 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1310 		info->fw = adev->gfx.mec_fw;
1311 		adev->firmware.fw_size +=
1312 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1313 
1314 		if (adev->gfx.mec2_fw) {
1315 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1316 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1317 			info->fw = adev->gfx.mec2_fw;
1318 			header = (const struct common_firmware_header *)info->fw->data;
1319 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1320 			adev->firmware.fw_size +=
1321 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1322 
1323 			/* TODO: Determine if MEC2 JT FW loading can be removed
1324 				 for all GFX V9 asic and above */
1325 			if (adev->asic_type != CHIP_ARCTURUS) {
1326 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1327 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1328 				info->fw = adev->gfx.mec2_fw;
1329 				adev->firmware.fw_size +=
1330 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1331 					PAGE_SIZE);
1332 			}
1333 		}
1334 	}
1335 
1336 out:
1337 	gfx_v9_0_check_if_need_gfxoff(adev);
1338 	gfx_v9_0_check_fw_write_wait(adev);
1339 	if (err) {
1340 		dev_err(adev->dev,
1341 			"gfx9: Failed to load firmware \"%s\"\n",
1342 			fw_name);
1343 		release_firmware(adev->gfx.mec_fw);
1344 		adev->gfx.mec_fw = NULL;
1345 		release_firmware(adev->gfx.mec2_fw);
1346 		adev->gfx.mec2_fw = NULL;
1347 	}
1348 	return err;
1349 }
1350 
1351 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1352 {
1353 	const char *chip_name;
1354 	int r;
1355 
1356 	DRM_DEBUG("\n");
1357 
1358 	switch (adev->asic_type) {
1359 	case CHIP_VEGA10:
1360 		chip_name = "vega10";
1361 		break;
1362 	case CHIP_VEGA12:
1363 		chip_name = "vega12";
1364 		break;
1365 	case CHIP_VEGA20:
1366 		chip_name = "vega20";
1367 		break;
1368 	case CHIP_RAVEN:
1369 		if (adev->rev_id >= 8)
1370 			chip_name = "raven2";
1371 		else if (adev->pdev->device == 0x15d8)
1372 			chip_name = "picasso";
1373 		else
1374 			chip_name = "raven";
1375 		break;
1376 	case CHIP_ARCTURUS:
1377 		chip_name = "arcturus";
1378 		break;
1379 	case CHIP_RENOIR:
1380 		chip_name = "renoir";
1381 		break;
1382 	default:
1383 		BUG();
1384 	}
1385 
1386 	/* No CPG in Arcturus */
1387 	if (adev->asic_type != CHIP_ARCTURUS) {
1388 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1389 		if (r)
1390 			return r;
1391 	}
1392 
1393 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1394 	if (r)
1395 		return r;
1396 
1397 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1398 	if (r)
1399 		return r;
1400 
1401 	return r;
1402 }
1403 
1404 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1405 {
1406 	u32 count = 0;
1407 	const struct cs_section_def *sect = NULL;
1408 	const struct cs_extent_def *ext = NULL;
1409 
1410 	/* begin clear state */
1411 	count += 2;
1412 	/* context control state */
1413 	count += 3;
1414 
1415 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1416 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1417 			if (sect->id == SECT_CONTEXT)
1418 				count += 2 + ext->reg_count;
1419 			else
1420 				return 0;
1421 		}
1422 	}
1423 
1424 	/* end clear state */
1425 	count += 2;
1426 	/* clear state */
1427 	count += 2;
1428 
1429 	return count;
1430 }
1431 
1432 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1433 				    volatile u32 *buffer)
1434 {
1435 	u32 count = 0, i;
1436 	const struct cs_section_def *sect = NULL;
1437 	const struct cs_extent_def *ext = NULL;
1438 
1439 	if (adev->gfx.rlc.cs_data == NULL)
1440 		return;
1441 	if (buffer == NULL)
1442 		return;
1443 
1444 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1445 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1446 
1447 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1448 	buffer[count++] = cpu_to_le32(0x80000000);
1449 	buffer[count++] = cpu_to_le32(0x80000000);
1450 
1451 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1452 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1453 			if (sect->id == SECT_CONTEXT) {
1454 				buffer[count++] =
1455 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1456 				buffer[count++] = cpu_to_le32(ext->reg_index -
1457 						PACKET3_SET_CONTEXT_REG_START);
1458 				for (i = 0; i < ext->reg_count; i++)
1459 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1460 			} else {
1461 				return;
1462 			}
1463 		}
1464 	}
1465 
1466 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1467 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1468 
1469 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1470 	buffer[count++] = cpu_to_le32(0);
1471 }
1472 
1473 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1474 {
1475 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1476 	uint32_t pg_always_on_cu_num = 2;
1477 	uint32_t always_on_cu_num;
1478 	uint32_t i, j, k;
1479 	uint32_t mask, cu_bitmap, counter;
1480 
1481 	if (adev->flags & AMD_IS_APU)
1482 		always_on_cu_num = 4;
1483 	else if (adev->asic_type == CHIP_VEGA12)
1484 		always_on_cu_num = 8;
1485 	else
1486 		always_on_cu_num = 12;
1487 
1488 	mutex_lock(&adev->grbm_idx_mutex);
1489 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1490 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1491 			mask = 1;
1492 			cu_bitmap = 0;
1493 			counter = 0;
1494 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1495 
1496 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1497 				if (cu_info->bitmap[i][j] & mask) {
1498 					if (counter == pg_always_on_cu_num)
1499 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1500 					if (counter < always_on_cu_num)
1501 						cu_bitmap |= mask;
1502 					else
1503 						break;
1504 					counter++;
1505 				}
1506 				mask <<= 1;
1507 			}
1508 
1509 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1510 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1511 		}
1512 	}
1513 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1514 	mutex_unlock(&adev->grbm_idx_mutex);
1515 }
1516 
1517 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1518 {
1519 	uint32_t data;
1520 
1521 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1522 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1523 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1524 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1525 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1526 
1527 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1528 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1529 
1530 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1531 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1532 
1533 	mutex_lock(&adev->grbm_idx_mutex);
1534 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1535 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1536 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1537 
1538 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1539 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1540 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1541 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1542 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1543 
1544 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1545 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1546 	data &= 0x0000FFFF;
1547 	data |= 0x00C00000;
1548 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1549 
1550 	/*
1551 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1552 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1553 	 */
1554 
1555 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1556 	 * but used for RLC_LB_CNTL configuration */
1557 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1558 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1559 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1560 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1561 	mutex_unlock(&adev->grbm_idx_mutex);
1562 
1563 	gfx_v9_0_init_always_on_cu_mask(adev);
1564 }
1565 
1566 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1567 {
1568 	uint32_t data;
1569 
1570 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1571 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1572 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1573 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1574 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1575 
1576 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1577 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1578 
1579 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1580 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1581 
1582 	mutex_lock(&adev->grbm_idx_mutex);
1583 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1584 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1585 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1586 
1587 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1588 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1589 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1590 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1591 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1592 
1593 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1594 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1595 	data &= 0x0000FFFF;
1596 	data |= 0x00C00000;
1597 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1598 
1599 	/*
1600 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1601 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1602 	 */
1603 
1604 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1605 	 * but used for RLC_LB_CNTL configuration */
1606 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1607 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1608 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1609 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1610 	mutex_unlock(&adev->grbm_idx_mutex);
1611 
1612 	gfx_v9_0_init_always_on_cu_mask(adev);
1613 }
1614 
1615 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1616 {
1617 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1618 }
1619 
1620 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1621 {
1622 	return 5;
1623 }
1624 
1625 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1626 {
1627 	const struct cs_section_def *cs_data;
1628 	int r;
1629 
1630 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1631 
1632 	cs_data = adev->gfx.rlc.cs_data;
1633 
1634 	if (cs_data) {
1635 		/* init clear state block */
1636 		r = amdgpu_gfx_rlc_init_csb(adev);
1637 		if (r)
1638 			return r;
1639 	}
1640 
1641 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1642 		/* TODO: double check the cp_table_size for RV */
1643 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1644 		r = amdgpu_gfx_rlc_init_cpt(adev);
1645 		if (r)
1646 			return r;
1647 	}
1648 
1649 	switch (adev->asic_type) {
1650 	case CHIP_RAVEN:
1651 	case CHIP_RENOIR:
1652 		gfx_v9_0_init_lbpw(adev);
1653 		break;
1654 	case CHIP_VEGA20:
1655 		gfx_v9_4_init_lbpw(adev);
1656 		break;
1657 	default:
1658 		break;
1659 	}
1660 
1661 	return 0;
1662 }
1663 
1664 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1665 {
1666 	int r;
1667 
1668 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1669 	if (unlikely(r != 0))
1670 		return r;
1671 
1672 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1673 			AMDGPU_GEM_DOMAIN_VRAM);
1674 	if (!r)
1675 		adev->gfx.rlc.clear_state_gpu_addr =
1676 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1677 
1678 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1679 
1680 	return r;
1681 }
1682 
1683 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1684 {
1685 	int r;
1686 
1687 	if (!adev->gfx.rlc.clear_state_obj)
1688 		return;
1689 
1690 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1691 	if (likely(r == 0)) {
1692 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1693 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1694 	}
1695 }
1696 
1697 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1698 {
1699 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1700 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1701 }
1702 
1703 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1704 {
1705 	int r;
1706 	u32 *hpd;
1707 	const __le32 *fw_data;
1708 	unsigned fw_size;
1709 	u32 *fw;
1710 	size_t mec_hpd_size;
1711 
1712 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1713 
1714 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1715 
1716 	/* take ownership of the relevant compute queues */
1717 	amdgpu_gfx_compute_queue_acquire(adev);
1718 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1719 
1720 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1721 				      AMDGPU_GEM_DOMAIN_VRAM,
1722 				      &adev->gfx.mec.hpd_eop_obj,
1723 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1724 				      (void **)&hpd);
1725 	if (r) {
1726 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1727 		gfx_v9_0_mec_fini(adev);
1728 		return r;
1729 	}
1730 
1731 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1732 
1733 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1734 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1735 
1736 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1737 
1738 	fw_data = (const __le32 *)
1739 		(adev->gfx.mec_fw->data +
1740 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1741 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1742 
1743 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1744 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1745 				      &adev->gfx.mec.mec_fw_obj,
1746 				      &adev->gfx.mec.mec_fw_gpu_addr,
1747 				      (void **)&fw);
1748 	if (r) {
1749 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1750 		gfx_v9_0_mec_fini(adev);
1751 		return r;
1752 	}
1753 
1754 	memcpy(fw, fw_data, fw_size);
1755 
1756 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1757 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1758 
1759 	return 0;
1760 }
1761 
1762 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1763 {
1764 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1765 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1766 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1767 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1768 		(SQ_IND_INDEX__FORCE_READ_MASK));
1769 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1770 }
1771 
1772 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1773 			   uint32_t wave, uint32_t thread,
1774 			   uint32_t regno, uint32_t num, uint32_t *out)
1775 {
1776 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1777 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1778 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1779 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1780 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1781 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1782 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1783 	while (num--)
1784 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1785 }
1786 
1787 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1788 {
1789 	/* type 1 wave data */
1790 	dst[(*no_fields)++] = 1;
1791 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1792 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1799 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1800 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1801 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1802 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1803 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1804 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1805 }
1806 
1807 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1808 				     uint32_t wave, uint32_t start,
1809 				     uint32_t size, uint32_t *dst)
1810 {
1811 	wave_read_regs(
1812 		adev, simd, wave, 0,
1813 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1814 }
1815 
1816 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1817 				     uint32_t wave, uint32_t thread,
1818 				     uint32_t start, uint32_t size,
1819 				     uint32_t *dst)
1820 {
1821 	wave_read_regs(
1822 		adev, simd, wave, thread,
1823 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1824 }
1825 
1826 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1827 				  u32 me, u32 pipe, u32 q, u32 vm)
1828 {
1829 	soc15_grbm_select(adev, me, pipe, q, vm);
1830 }
1831 
1832 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1833 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1834 	.select_se_sh = &gfx_v9_0_select_se_sh,
1835 	.read_wave_data = &gfx_v9_0_read_wave_data,
1836 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1837 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1838 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1839 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1840 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1841 };
1842 
1843 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1844 {
1845 	u32 gb_addr_config;
1846 	int err;
1847 
1848 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1849 
1850 	switch (adev->asic_type) {
1851 	case CHIP_VEGA10:
1852 		adev->gfx.config.max_hw_contexts = 8;
1853 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1857 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1858 		break;
1859 	case CHIP_VEGA12:
1860 		adev->gfx.config.max_hw_contexts = 8;
1861 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1862 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1863 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1864 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1865 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1866 		DRM_INFO("fix gfx.config for vega12\n");
1867 		break;
1868 	case CHIP_VEGA20:
1869 		adev->gfx.config.max_hw_contexts = 8;
1870 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1871 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1872 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1873 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1874 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1875 		gb_addr_config &= ~0xf3e777ff;
1876 		gb_addr_config |= 0x22014042;
1877 		/* check vbios table if gpu info is not available */
1878 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1879 		if (err)
1880 			return err;
1881 		break;
1882 	case CHIP_RAVEN:
1883 		adev->gfx.config.max_hw_contexts = 8;
1884 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1888 		if (adev->rev_id >= 8)
1889 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1890 		else
1891 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1892 		break;
1893 	case CHIP_ARCTURUS:
1894 		adev->gfx.config.max_hw_contexts = 8;
1895 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1896 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1897 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1898 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1899 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1900 		gb_addr_config &= ~0xf3e777ff;
1901 		gb_addr_config |= 0x22014042;
1902 		break;
1903 	case CHIP_RENOIR:
1904 		adev->gfx.config.max_hw_contexts = 8;
1905 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1908 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1909 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1910 		gb_addr_config &= ~0xf3e777ff;
1911 		gb_addr_config |= 0x22010042;
1912 		break;
1913 	default:
1914 		BUG();
1915 		break;
1916 	}
1917 
1918 	adev->gfx.config.gb_addr_config = gb_addr_config;
1919 
1920 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1921 			REG_GET_FIELD(
1922 					adev->gfx.config.gb_addr_config,
1923 					GB_ADDR_CONFIG,
1924 					NUM_PIPES);
1925 
1926 	adev->gfx.config.max_tile_pipes =
1927 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1928 
1929 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1930 			REG_GET_FIELD(
1931 					adev->gfx.config.gb_addr_config,
1932 					GB_ADDR_CONFIG,
1933 					NUM_BANKS);
1934 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1935 			REG_GET_FIELD(
1936 					adev->gfx.config.gb_addr_config,
1937 					GB_ADDR_CONFIG,
1938 					MAX_COMPRESSED_FRAGS);
1939 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1940 			REG_GET_FIELD(
1941 					adev->gfx.config.gb_addr_config,
1942 					GB_ADDR_CONFIG,
1943 					NUM_RB_PER_SE);
1944 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1945 			REG_GET_FIELD(
1946 					adev->gfx.config.gb_addr_config,
1947 					GB_ADDR_CONFIG,
1948 					NUM_SHADER_ENGINES);
1949 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1950 			REG_GET_FIELD(
1951 					adev->gfx.config.gb_addr_config,
1952 					GB_ADDR_CONFIG,
1953 					PIPE_INTERLEAVE_SIZE));
1954 
1955 	return 0;
1956 }
1957 
1958 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1959 				   struct amdgpu_ngg_buf *ngg_buf,
1960 				   int size_se,
1961 				   int default_size_se)
1962 {
1963 	int r;
1964 
1965 	if (size_se < 0) {
1966 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1967 		return -EINVAL;
1968 	}
1969 	size_se = size_se ? size_se : default_size_se;
1970 
1971 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1972 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1973 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1974 				    &ngg_buf->bo,
1975 				    &ngg_buf->gpu_addr,
1976 				    NULL);
1977 	if (r) {
1978 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1979 		return r;
1980 	}
1981 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1982 
1983 	return r;
1984 }
1985 
1986 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1987 {
1988 	int i;
1989 
1990 	for (i = 0; i < NGG_BUF_MAX; i++)
1991 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1992 				      &adev->gfx.ngg.buf[i].gpu_addr,
1993 				      NULL);
1994 
1995 	memset(&adev->gfx.ngg.buf[0], 0,
1996 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1997 
1998 	adev->gfx.ngg.init = false;
1999 
2000 	return 0;
2001 }
2002 
2003 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2004 {
2005 	int r;
2006 
2007 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2008 		return 0;
2009 
2010 	/* GDS reserve memory: 64 bytes alignment */
2011 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2012 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2013 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2014 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2015 
2016 	/* Primitive Buffer */
2017 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2018 				    amdgpu_prim_buf_per_se,
2019 				    64 * 1024);
2020 	if (r) {
2021 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2022 		goto err;
2023 	}
2024 
2025 	/* Position Buffer */
2026 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2027 				    amdgpu_pos_buf_per_se,
2028 				    256 * 1024);
2029 	if (r) {
2030 		dev_err(adev->dev, "Failed to create Position Buffer\n");
2031 		goto err;
2032 	}
2033 
2034 	/* Control Sideband */
2035 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2036 				    amdgpu_cntl_sb_buf_per_se,
2037 				    256);
2038 	if (r) {
2039 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2040 		goto err;
2041 	}
2042 
2043 	/* Parameter Cache, not created by default */
2044 	if (amdgpu_param_buf_per_se <= 0)
2045 		goto out;
2046 
2047 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2048 				    amdgpu_param_buf_per_se,
2049 				    512 * 1024);
2050 	if (r) {
2051 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2052 		goto err;
2053 	}
2054 
2055 out:
2056 	adev->gfx.ngg.init = true;
2057 	return 0;
2058 err:
2059 	gfx_v9_0_ngg_fini(adev);
2060 	return r;
2061 }
2062 
2063 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2064 {
2065 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2066 	int r;
2067 	u32 data, base;
2068 
2069 	if (!amdgpu_ngg)
2070 		return 0;
2071 
2072 	/* Program buffer size */
2073 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2074 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2075 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2076 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2077 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2078 
2079 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2080 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2081 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2082 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2083 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2084 
2085 	/* Program buffer base address */
2086 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2087 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2088 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2089 
2090 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2091 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2092 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2093 
2094 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2095 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2096 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2097 
2098 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2099 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2100 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2101 
2102 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2103 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2104 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2105 
2106 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2107 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2108 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2109 
2110 	/* Clear GDS reserved memory */
2111 	r = amdgpu_ring_alloc(ring, 17);
2112 	if (r) {
2113 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2114 			  ring->name, r);
2115 		return r;
2116 	}
2117 
2118 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2119 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2120 			           (adev->gds.gds_size +
2121 				    adev->gfx.ngg.gds_reserve_size));
2122 
2123 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2124 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2125 				PACKET3_DMA_DATA_DST_SEL(1) |
2126 				PACKET3_DMA_DATA_SRC_SEL(2)));
2127 	amdgpu_ring_write(ring, 0);
2128 	amdgpu_ring_write(ring, 0);
2129 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2130 	amdgpu_ring_write(ring, 0);
2131 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2132 				adev->gfx.ngg.gds_reserve_size);
2133 
2134 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2135 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2136 
2137 	amdgpu_ring_commit(ring);
2138 
2139 	return 0;
2140 }
2141 
2142 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2143 				      int mec, int pipe, int queue)
2144 {
2145 	int r;
2146 	unsigned irq_type;
2147 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2148 
2149 	ring = &adev->gfx.compute_ring[ring_id];
2150 
2151 	/* mec0 is me1 */
2152 	ring->me = mec + 1;
2153 	ring->pipe = pipe;
2154 	ring->queue = queue;
2155 
2156 	ring->ring_obj = NULL;
2157 	ring->use_doorbell = true;
2158 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2159 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2160 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2161 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162 
2163 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165 		+ ring->pipe;
2166 
2167 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2168 	r = amdgpu_ring_init(adev, ring, 1024,
2169 			     &adev->gfx.eop_irq, irq_type);
2170 	if (r)
2171 		return r;
2172 
2173 
2174 	return 0;
2175 }
2176 
2177 static int gfx_v9_0_sw_init(void *handle)
2178 {
2179 	int i, j, k, r, ring_id;
2180 	struct amdgpu_ring *ring;
2181 	struct amdgpu_kiq *kiq;
2182 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2183 
2184 	switch (adev->asic_type) {
2185 	case CHIP_VEGA10:
2186 	case CHIP_VEGA12:
2187 	case CHIP_VEGA20:
2188 	case CHIP_RAVEN:
2189 	case CHIP_ARCTURUS:
2190 	case CHIP_RENOIR:
2191 		adev->gfx.mec.num_mec = 2;
2192 		break;
2193 	default:
2194 		adev->gfx.mec.num_mec = 1;
2195 		break;
2196 	}
2197 
2198 	adev->gfx.mec.num_pipe_per_mec = 4;
2199 	adev->gfx.mec.num_queue_per_pipe = 8;
2200 
2201 	/* EOP Event */
2202 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2203 	if (r)
2204 		return r;
2205 
2206 	/* Privileged reg */
2207 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2208 			      &adev->gfx.priv_reg_irq);
2209 	if (r)
2210 		return r;
2211 
2212 	/* Privileged inst */
2213 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2214 			      &adev->gfx.priv_inst_irq);
2215 	if (r)
2216 		return r;
2217 
2218 	/* ECC error */
2219 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2220 			      &adev->gfx.cp_ecc_error_irq);
2221 	if (r)
2222 		return r;
2223 
2224 	/* FUE error */
2225 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2226 			      &adev->gfx.cp_ecc_error_irq);
2227 	if (r)
2228 		return r;
2229 
2230 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2231 
2232 	gfx_v9_0_scratch_init(adev);
2233 
2234 	r = gfx_v9_0_init_microcode(adev);
2235 	if (r) {
2236 		DRM_ERROR("Failed to load gfx firmware!\n");
2237 		return r;
2238 	}
2239 
2240 	r = adev->gfx.rlc.funcs->init(adev);
2241 	if (r) {
2242 		DRM_ERROR("Failed to init rlc BOs!\n");
2243 		return r;
2244 	}
2245 
2246 	r = gfx_v9_0_mec_init(adev);
2247 	if (r) {
2248 		DRM_ERROR("Failed to init MEC BOs!\n");
2249 		return r;
2250 	}
2251 
2252 	/* set up the gfx ring */
2253 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2254 		ring = &adev->gfx.gfx_ring[i];
2255 		ring->ring_obj = NULL;
2256 		if (!i)
2257 			sprintf(ring->name, "gfx");
2258 		else
2259 			sprintf(ring->name, "gfx_%d", i);
2260 		ring->use_doorbell = true;
2261 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2262 		r = amdgpu_ring_init(adev, ring, 1024,
2263 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2264 		if (r)
2265 			return r;
2266 	}
2267 
2268 	/* set up the compute queues - allocate horizontally across pipes */
2269 	ring_id = 0;
2270 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2271 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2272 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2273 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2274 					continue;
2275 
2276 				r = gfx_v9_0_compute_ring_init(adev,
2277 							       ring_id,
2278 							       i, k, j);
2279 				if (r)
2280 					return r;
2281 
2282 				ring_id++;
2283 			}
2284 		}
2285 	}
2286 
2287 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2288 	if (r) {
2289 		DRM_ERROR("Failed to init KIQ BOs!\n");
2290 		return r;
2291 	}
2292 
2293 	kiq = &adev->gfx.kiq;
2294 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2295 	if (r)
2296 		return r;
2297 
2298 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2299 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2300 	if (r)
2301 		return r;
2302 
2303 	adev->gfx.ce_ram_size = 0x8000;
2304 
2305 	r = gfx_v9_0_gpu_early_init(adev);
2306 	if (r)
2307 		return r;
2308 
2309 	r = gfx_v9_0_ngg_init(adev);
2310 	if (r)
2311 		return r;
2312 
2313 	return 0;
2314 }
2315 
2316 
2317 static int gfx_v9_0_sw_fini(void *handle)
2318 {
2319 	int i;
2320 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2321 
2322 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2323 			adev->gfx.ras_if) {
2324 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2325 		struct ras_ih_if ih_info = {
2326 			.head = *ras_if,
2327 		};
2328 
2329 		amdgpu_ras_debugfs_remove(adev, ras_if);
2330 		amdgpu_ras_sysfs_remove(adev, ras_if);
2331 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2332 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2333 		kfree(ras_if);
2334 	}
2335 
2336 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2337 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2338 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2339 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2340 
2341 	amdgpu_gfx_mqd_sw_fini(adev);
2342 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2343 	amdgpu_gfx_kiq_fini(adev);
2344 
2345 	gfx_v9_0_mec_fini(adev);
2346 	gfx_v9_0_ngg_fini(adev);
2347 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2348 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2349 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2350 				&adev->gfx.rlc.cp_table_gpu_addr,
2351 				(void **)&adev->gfx.rlc.cp_table_ptr);
2352 	}
2353 	gfx_v9_0_free_microcode(adev);
2354 
2355 	return 0;
2356 }
2357 
2358 
2359 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2360 {
2361 	/* TODO */
2362 }
2363 
2364 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2365 {
2366 	u32 data;
2367 
2368 	if (instance == 0xffffffff)
2369 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2370 	else
2371 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2372 
2373 	if (se_num == 0xffffffff)
2374 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2375 	else
2376 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2377 
2378 	if (sh_num == 0xffffffff)
2379 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2380 	else
2381 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2382 
2383 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2384 }
2385 
2386 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2387 {
2388 	u32 data, mask;
2389 
2390 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2391 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2392 
2393 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2394 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2395 
2396 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2397 					 adev->gfx.config.max_sh_per_se);
2398 
2399 	return (~data) & mask;
2400 }
2401 
2402 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2403 {
2404 	int i, j;
2405 	u32 data;
2406 	u32 active_rbs = 0;
2407 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2408 					adev->gfx.config.max_sh_per_se;
2409 
2410 	mutex_lock(&adev->grbm_idx_mutex);
2411 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2412 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2413 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2414 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2415 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2416 					       rb_bitmap_width_per_sh);
2417 		}
2418 	}
2419 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2420 	mutex_unlock(&adev->grbm_idx_mutex);
2421 
2422 	adev->gfx.config.backend_enable_mask = active_rbs;
2423 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2424 }
2425 
2426 #define DEFAULT_SH_MEM_BASES	(0x6000)
2427 #define FIRST_COMPUTE_VMID	(8)
2428 #define LAST_COMPUTE_VMID	(16)
2429 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2430 {
2431 	int i;
2432 	uint32_t sh_mem_config;
2433 	uint32_t sh_mem_bases;
2434 
2435 	/*
2436 	 * Configure apertures:
2437 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2438 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2439 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2440 	 */
2441 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2442 
2443 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2444 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2445 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2446 
2447 	mutex_lock(&adev->srbm_mutex);
2448 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2449 		soc15_grbm_select(adev, 0, 0, 0, i);
2450 		/* CP and shaders */
2451 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2452 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2453 	}
2454 	soc15_grbm_select(adev, 0, 0, 0, 0);
2455 	mutex_unlock(&adev->srbm_mutex);
2456 
2457 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2458 	   acccess. These should be enabled by FW for target VMIDs. */
2459 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2460 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2461 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2462 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2463 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2464 	}
2465 }
2466 
2467 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2468 {
2469 	int vmid;
2470 
2471 	/*
2472 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2473 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2474 	 * the driver can enable them for graphics. VMID0 should maintain
2475 	 * access so that HWS firmware can save/restore entries.
2476 	 */
2477 	for (vmid = 1; vmid < 16; vmid++) {
2478 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2479 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2480 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2481 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2482 	}
2483 }
2484 
2485 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2486 {
2487 	u32 tmp;
2488 	int i;
2489 
2490 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2491 
2492 	gfx_v9_0_tiling_mode_table_init(adev);
2493 
2494 	gfx_v9_0_setup_rb(adev);
2495 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2496 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2497 
2498 	/* XXX SH_MEM regs */
2499 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2500 	mutex_lock(&adev->srbm_mutex);
2501 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2502 		soc15_grbm_select(adev, 0, 0, 0, i);
2503 		/* CP and shaders */
2504 		if (i == 0) {
2505 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2506 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2507 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2508 					    !!amdgpu_noretry);
2509 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2510 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2511 		} else {
2512 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2513 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2514 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2515 					    !!amdgpu_noretry);
2516 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2517 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2518 				(adev->gmc.private_aperture_start >> 48));
2519 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2520 				(adev->gmc.shared_aperture_start >> 48));
2521 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2522 		}
2523 	}
2524 	soc15_grbm_select(adev, 0, 0, 0, 0);
2525 
2526 	mutex_unlock(&adev->srbm_mutex);
2527 
2528 	gfx_v9_0_init_compute_vmid(adev);
2529 	gfx_v9_0_init_gds_vmid(adev);
2530 }
2531 
2532 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2533 {
2534 	u32 i, j, k;
2535 	u32 mask;
2536 
2537 	mutex_lock(&adev->grbm_idx_mutex);
2538 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2539 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2540 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2541 			for (k = 0; k < adev->usec_timeout; k++) {
2542 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2543 					break;
2544 				udelay(1);
2545 			}
2546 			if (k == adev->usec_timeout) {
2547 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2548 						      0xffffffff, 0xffffffff);
2549 				mutex_unlock(&adev->grbm_idx_mutex);
2550 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2551 					 i, j);
2552 				return;
2553 			}
2554 		}
2555 	}
2556 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2557 	mutex_unlock(&adev->grbm_idx_mutex);
2558 
2559 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2560 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2561 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2562 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2563 	for (k = 0; k < adev->usec_timeout; k++) {
2564 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2565 			break;
2566 		udelay(1);
2567 	}
2568 }
2569 
2570 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2571 					       bool enable)
2572 {
2573 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2574 
2575 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2576 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2577 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2578 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2579 
2580 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2581 }
2582 
2583 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2584 {
2585 	/* csib */
2586 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2587 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2588 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2589 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2590 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2591 			adev->gfx.rlc.clear_state_size);
2592 }
2593 
2594 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2595 				int indirect_offset,
2596 				int list_size,
2597 				int *unique_indirect_regs,
2598 				int unique_indirect_reg_count,
2599 				int *indirect_start_offsets,
2600 				int *indirect_start_offsets_count,
2601 				int max_start_offsets_count)
2602 {
2603 	int idx;
2604 
2605 	for (; indirect_offset < list_size; indirect_offset++) {
2606 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2607 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2608 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2609 
2610 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2611 			indirect_offset += 2;
2612 
2613 			/* look for the matching indice */
2614 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2615 				if (unique_indirect_regs[idx] ==
2616 					register_list_format[indirect_offset] ||
2617 					!unique_indirect_regs[idx])
2618 					break;
2619 			}
2620 
2621 			BUG_ON(idx >= unique_indirect_reg_count);
2622 
2623 			if (!unique_indirect_regs[idx])
2624 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2625 
2626 			indirect_offset++;
2627 		}
2628 	}
2629 }
2630 
2631 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2632 {
2633 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2634 	int unique_indirect_reg_count = 0;
2635 
2636 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2637 	int indirect_start_offsets_count = 0;
2638 
2639 	int list_size = 0;
2640 	int i = 0, j = 0;
2641 	u32 tmp = 0;
2642 
2643 	u32 *register_list_format =
2644 		kmemdup(adev->gfx.rlc.register_list_format,
2645 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2646 	if (!register_list_format)
2647 		return -ENOMEM;
2648 
2649 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2650 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2651 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2652 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2653 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2654 				    unique_indirect_regs,
2655 				    unique_indirect_reg_count,
2656 				    indirect_start_offsets,
2657 				    &indirect_start_offsets_count,
2658 				    ARRAY_SIZE(indirect_start_offsets));
2659 
2660 	/* enable auto inc in case it is disabled */
2661 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2662 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2663 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2664 
2665 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2666 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2667 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2668 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2669 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2670 			adev->gfx.rlc.register_restore[i]);
2671 
2672 	/* load indirect register */
2673 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2674 		adev->gfx.rlc.reg_list_format_start);
2675 
2676 	/* direct register portion */
2677 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2678 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2679 			register_list_format[i]);
2680 
2681 	/* indirect register portion */
2682 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2683 		if (register_list_format[i] == 0xFFFFFFFF) {
2684 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2685 			continue;
2686 		}
2687 
2688 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2689 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2690 
2691 		for (j = 0; j < unique_indirect_reg_count; j++) {
2692 			if (register_list_format[i] == unique_indirect_regs[j]) {
2693 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2694 				break;
2695 			}
2696 		}
2697 
2698 		BUG_ON(j >= unique_indirect_reg_count);
2699 
2700 		i++;
2701 	}
2702 
2703 	/* set save/restore list size */
2704 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2705 	list_size = list_size >> 1;
2706 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2707 		adev->gfx.rlc.reg_restore_list_size);
2708 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2709 
2710 	/* write the starting offsets to RLC scratch ram */
2711 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2712 		adev->gfx.rlc.starting_offsets_start);
2713 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2714 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2715 		       indirect_start_offsets[i]);
2716 
2717 	/* load unique indirect regs*/
2718 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2719 		if (unique_indirect_regs[i] != 0) {
2720 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2721 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2722 			       unique_indirect_regs[i] & 0x3FFFF);
2723 
2724 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2725 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2726 			       unique_indirect_regs[i] >> 20);
2727 		}
2728 	}
2729 
2730 	kfree(register_list_format);
2731 	return 0;
2732 }
2733 
2734 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2735 {
2736 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2737 }
2738 
2739 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2740 					     bool enable)
2741 {
2742 	uint32_t data = 0;
2743 	uint32_t default_data = 0;
2744 
2745 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2746 	if (enable == true) {
2747 		/* enable GFXIP control over CGPG */
2748 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2749 		if(default_data != data)
2750 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2751 
2752 		/* update status */
2753 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2754 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2755 		if(default_data != data)
2756 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2757 	} else {
2758 		/* restore GFXIP control over GCPG */
2759 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2760 		if(default_data != data)
2761 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2762 	}
2763 }
2764 
2765 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2766 {
2767 	uint32_t data = 0;
2768 
2769 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2770 			      AMD_PG_SUPPORT_GFX_SMG |
2771 			      AMD_PG_SUPPORT_GFX_DMG)) {
2772 		/* init IDLE_POLL_COUNT = 60 */
2773 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2774 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2775 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2776 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2777 
2778 		/* init RLC PG Delay */
2779 		data = 0;
2780 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2781 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2782 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2783 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2784 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2785 
2786 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2787 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2788 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2789 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2790 
2791 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2792 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2793 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2794 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2795 
2796 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2797 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2798 
2799 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2800 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2801 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2802 
2803 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2804 	}
2805 }
2806 
2807 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2808 						bool enable)
2809 {
2810 	uint32_t data = 0;
2811 	uint32_t default_data = 0;
2812 
2813 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2814 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2815 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2816 			     enable ? 1 : 0);
2817 	if (default_data != data)
2818 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2819 }
2820 
2821 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2822 						bool enable)
2823 {
2824 	uint32_t data = 0;
2825 	uint32_t default_data = 0;
2826 
2827 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2830 			     enable ? 1 : 0);
2831 	if(default_data != data)
2832 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834 
2835 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2836 					bool enable)
2837 {
2838 	uint32_t data = 0;
2839 	uint32_t default_data = 0;
2840 
2841 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2842 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2843 			     CP_PG_DISABLE,
2844 			     enable ? 0 : 1);
2845 	if(default_data != data)
2846 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2847 }
2848 
2849 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2850 						bool enable)
2851 {
2852 	uint32_t data, default_data;
2853 
2854 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2855 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2856 			     GFX_POWER_GATING_ENABLE,
2857 			     enable ? 1 : 0);
2858 	if(default_data != data)
2859 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2860 }
2861 
2862 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2863 						bool enable)
2864 {
2865 	uint32_t data, default_data;
2866 
2867 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2868 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2869 			     GFX_PIPELINE_PG_ENABLE,
2870 			     enable ? 1 : 0);
2871 	if(default_data != data)
2872 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2873 
2874 	if (!enable)
2875 		/* read any GFX register to wake up GFX */
2876 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2877 }
2878 
2879 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2880 						       bool enable)
2881 {
2882 	uint32_t data, default_data;
2883 
2884 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2885 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2886 			     STATIC_PER_CU_PG_ENABLE,
2887 			     enable ? 1 : 0);
2888 	if(default_data != data)
2889 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2890 }
2891 
2892 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2893 						bool enable)
2894 {
2895 	uint32_t data, default_data;
2896 
2897 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2898 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2899 			     DYN_PER_CU_PG_ENABLE,
2900 			     enable ? 1 : 0);
2901 	if(default_data != data)
2902 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2903 }
2904 
2905 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2906 {
2907 	gfx_v9_0_init_csb(adev);
2908 
2909 	/*
2910 	 * Rlc save restore list is workable since v2_1.
2911 	 * And it's needed by gfxoff feature.
2912 	 */
2913 	if (adev->gfx.rlc.is_rlc_v2_1) {
2914 		gfx_v9_1_init_rlc_save_restore_list(adev);
2915 		gfx_v9_0_enable_save_restore_machine(adev);
2916 	}
2917 
2918 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2919 			      AMD_PG_SUPPORT_GFX_SMG |
2920 			      AMD_PG_SUPPORT_GFX_DMG |
2921 			      AMD_PG_SUPPORT_CP |
2922 			      AMD_PG_SUPPORT_GDS |
2923 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2924 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2925 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2926 		gfx_v9_0_init_gfx_power_gating(adev);
2927 	}
2928 }
2929 
2930 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2931 {
2932 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2933 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2934 	gfx_v9_0_wait_for_rlc_serdes(adev);
2935 }
2936 
2937 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2938 {
2939 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2940 	udelay(50);
2941 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2942 	udelay(50);
2943 }
2944 
2945 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2946 {
2947 #ifdef AMDGPU_RLC_DEBUG_RETRY
2948 	u32 rlc_ucode_ver;
2949 #endif
2950 
2951 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2952 	udelay(50);
2953 
2954 	/* carrizo do enable cp interrupt after cp inited */
2955 	if (!(adev->flags & AMD_IS_APU)) {
2956 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2957 		udelay(50);
2958 	}
2959 
2960 #ifdef AMDGPU_RLC_DEBUG_RETRY
2961 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2962 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2963 	if(rlc_ucode_ver == 0x108) {
2964 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2965 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2966 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2967 		 * default is 0x9C4 to create a 100us interval */
2968 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2969 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2970 		 * to disable the page fault retry interrupts, default is
2971 		 * 0x100 (256) */
2972 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2973 	}
2974 #endif
2975 }
2976 
2977 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2978 {
2979 	const struct rlc_firmware_header_v2_0 *hdr;
2980 	const __le32 *fw_data;
2981 	unsigned i, fw_size;
2982 
2983 	if (!adev->gfx.rlc_fw)
2984 		return -EINVAL;
2985 
2986 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2987 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2988 
2989 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2990 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2991 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2992 
2993 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2994 			RLCG_UCODE_LOADING_START_ADDRESS);
2995 	for (i = 0; i < fw_size; i++)
2996 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2997 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2998 
2999 	return 0;
3000 }
3001 
3002 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3003 {
3004 	int r;
3005 
3006 	if (amdgpu_sriov_vf(adev)) {
3007 		gfx_v9_0_init_csb(adev);
3008 		return 0;
3009 	}
3010 
3011 	adev->gfx.rlc.funcs->stop(adev);
3012 
3013 	/* disable CG */
3014 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3015 
3016 	gfx_v9_0_init_pg(adev);
3017 
3018 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3019 		/* legacy rlc firmware loading */
3020 		r = gfx_v9_0_rlc_load_microcode(adev);
3021 		if (r)
3022 			return r;
3023 	}
3024 
3025 	switch (adev->asic_type) {
3026 	case CHIP_RAVEN:
3027 	case CHIP_RENOIR:
3028 		if (amdgpu_lbpw == 0)
3029 			gfx_v9_0_enable_lbpw(adev, false);
3030 		else
3031 			gfx_v9_0_enable_lbpw(adev, true);
3032 		break;
3033 	case CHIP_VEGA20:
3034 		if (amdgpu_lbpw > 0)
3035 			gfx_v9_0_enable_lbpw(adev, true);
3036 		else
3037 			gfx_v9_0_enable_lbpw(adev, false);
3038 		break;
3039 	default:
3040 		break;
3041 	}
3042 
3043 	adev->gfx.rlc.funcs->start(adev);
3044 
3045 	return 0;
3046 }
3047 
3048 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3049 {
3050 	int i;
3051 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3052 
3053 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3054 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3055 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3056 	if (!enable) {
3057 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3058 			adev->gfx.gfx_ring[i].sched.ready = false;
3059 	}
3060 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3061 	udelay(50);
3062 }
3063 
3064 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3065 {
3066 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3067 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3068 	const struct gfx_firmware_header_v1_0 *me_hdr;
3069 	const __le32 *fw_data;
3070 	unsigned i, fw_size;
3071 
3072 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3073 		return -EINVAL;
3074 
3075 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3076 		adev->gfx.pfp_fw->data;
3077 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3078 		adev->gfx.ce_fw->data;
3079 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3080 		adev->gfx.me_fw->data;
3081 
3082 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3083 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3084 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3085 
3086 	gfx_v9_0_cp_gfx_enable(adev, false);
3087 
3088 	/* PFP */
3089 	fw_data = (const __le32 *)
3090 		(adev->gfx.pfp_fw->data +
3091 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3092 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3093 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3094 	for (i = 0; i < fw_size; i++)
3095 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3096 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3097 
3098 	/* CE */
3099 	fw_data = (const __le32 *)
3100 		(adev->gfx.ce_fw->data +
3101 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3102 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3103 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3104 	for (i = 0; i < fw_size; i++)
3105 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3106 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3107 
3108 	/* ME */
3109 	fw_data = (const __le32 *)
3110 		(adev->gfx.me_fw->data +
3111 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3112 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3113 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3114 	for (i = 0; i < fw_size; i++)
3115 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3116 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3117 
3118 	return 0;
3119 }
3120 
3121 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3122 {
3123 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3124 	const struct cs_section_def *sect = NULL;
3125 	const struct cs_extent_def *ext = NULL;
3126 	int r, i, tmp;
3127 
3128 	/* init the CP */
3129 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3130 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3131 
3132 	gfx_v9_0_cp_gfx_enable(adev, true);
3133 
3134 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3135 	if (r) {
3136 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3137 		return r;
3138 	}
3139 
3140 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3141 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3142 
3143 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3144 	amdgpu_ring_write(ring, 0x80000000);
3145 	amdgpu_ring_write(ring, 0x80000000);
3146 
3147 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3148 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3149 			if (sect->id == SECT_CONTEXT) {
3150 				amdgpu_ring_write(ring,
3151 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3152 					       ext->reg_count));
3153 				amdgpu_ring_write(ring,
3154 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3155 				for (i = 0; i < ext->reg_count; i++)
3156 					amdgpu_ring_write(ring, ext->extent[i]);
3157 			}
3158 		}
3159 	}
3160 
3161 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3162 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3163 
3164 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3165 	amdgpu_ring_write(ring, 0);
3166 
3167 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3168 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3169 	amdgpu_ring_write(ring, 0x8000);
3170 	amdgpu_ring_write(ring, 0x8000);
3171 
3172 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3173 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3174 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3175 	amdgpu_ring_write(ring, tmp);
3176 	amdgpu_ring_write(ring, 0);
3177 
3178 	amdgpu_ring_commit(ring);
3179 
3180 	return 0;
3181 }
3182 
3183 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3184 {
3185 	struct amdgpu_ring *ring;
3186 	u32 tmp;
3187 	u32 rb_bufsz;
3188 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3189 
3190 	/* Set the write pointer delay */
3191 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3192 
3193 	/* set the RB to use vmid 0 */
3194 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3195 
3196 	/* Set ring buffer size */
3197 	ring = &adev->gfx.gfx_ring[0];
3198 	rb_bufsz = order_base_2(ring->ring_size / 8);
3199 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3200 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3201 #ifdef __BIG_ENDIAN
3202 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3203 #endif
3204 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3205 
3206 	/* Initialize the ring buffer's write pointers */
3207 	ring->wptr = 0;
3208 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3209 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3210 
3211 	/* set the wb address wether it's enabled or not */
3212 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3213 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3214 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3215 
3216 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3217 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3218 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3219 
3220 	mdelay(1);
3221 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3222 
3223 	rb_addr = ring->gpu_addr >> 8;
3224 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3225 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3226 
3227 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3228 	if (ring->use_doorbell) {
3229 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3230 				    DOORBELL_OFFSET, ring->doorbell_index);
3231 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3232 				    DOORBELL_EN, 1);
3233 	} else {
3234 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3235 	}
3236 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3237 
3238 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3239 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3240 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3241 
3242 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3243 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3244 
3245 
3246 	/* start the ring */
3247 	gfx_v9_0_cp_gfx_start(adev);
3248 	ring->sched.ready = true;
3249 
3250 	return 0;
3251 }
3252 
3253 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3254 {
3255 	int i;
3256 
3257 	if (enable) {
3258 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3259 	} else {
3260 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3261 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3262 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3263 			adev->gfx.compute_ring[i].sched.ready = false;
3264 		adev->gfx.kiq.ring.sched.ready = false;
3265 	}
3266 	udelay(50);
3267 }
3268 
3269 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3270 {
3271 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3272 	const __le32 *fw_data;
3273 	unsigned i;
3274 	u32 tmp;
3275 
3276 	if (!adev->gfx.mec_fw)
3277 		return -EINVAL;
3278 
3279 	gfx_v9_0_cp_compute_enable(adev, false);
3280 
3281 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3282 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3283 
3284 	fw_data = (const __le32 *)
3285 		(adev->gfx.mec_fw->data +
3286 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3287 	tmp = 0;
3288 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3289 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3290 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3291 
3292 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3293 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3294 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3295 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3296 
3297 	/* MEC1 */
3298 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3299 			 mec_hdr->jt_offset);
3300 	for (i = 0; i < mec_hdr->jt_size; i++)
3301 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3302 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3303 
3304 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3305 			adev->gfx.mec_fw_version);
3306 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3307 
3308 	return 0;
3309 }
3310 
3311 /* KIQ functions */
3312 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3313 {
3314 	uint32_t tmp;
3315 	struct amdgpu_device *adev = ring->adev;
3316 
3317 	/* tell RLC which is KIQ queue */
3318 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3319 	tmp &= 0xffffff00;
3320 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3321 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3322 	tmp |= 0x80;
3323 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3324 }
3325 
3326 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3327 {
3328 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3329 	uint64_t queue_mask = 0;
3330 	int r, i;
3331 
3332 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3333 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3334 			continue;
3335 
3336 		/* This situation may be hit in the future if a new HW
3337 		 * generation exposes more than 64 queues. If so, the
3338 		 * definition of queue_mask needs updating */
3339 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3340 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3341 			break;
3342 		}
3343 
3344 		queue_mask |= (1ull << i);
3345 	}
3346 
3347 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3348 	if (r) {
3349 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3350 		return r;
3351 	}
3352 
3353 	/* set resources */
3354 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3355 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3356 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3357 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3358 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3359 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3360 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3361 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3362 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3363 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3364 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3365 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3366 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3367 
3368 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3369 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3370 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3371 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3372 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3373 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3374 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3375 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3376 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3377 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3378 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3379 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3380 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3381 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3382 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3383 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3384 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3385 	}
3386 
3387 	r = amdgpu_ring_test_helper(kiq_ring);
3388 	if (r)
3389 		DRM_ERROR("KCQ enable failed\n");
3390 
3391 	return r;
3392 }
3393 
3394 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3395 {
3396 	struct amdgpu_device *adev = ring->adev;
3397 	struct v9_mqd *mqd = ring->mqd_ptr;
3398 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3399 	uint32_t tmp;
3400 
3401 	mqd->header = 0xC0310800;
3402 	mqd->compute_pipelinestat_enable = 0x00000001;
3403 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3404 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3405 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3406 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3407 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3408 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3409 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3410 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3411 	mqd->compute_misc_reserved = 0x00000003;
3412 
3413 	mqd->dynamic_cu_mask_addr_lo =
3414 		lower_32_bits(ring->mqd_gpu_addr
3415 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3416 	mqd->dynamic_cu_mask_addr_hi =
3417 		upper_32_bits(ring->mqd_gpu_addr
3418 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3419 
3420 	eop_base_addr = ring->eop_gpu_addr >> 8;
3421 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3422 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3423 
3424 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3425 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3426 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3427 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3428 
3429 	mqd->cp_hqd_eop_control = tmp;
3430 
3431 	/* enable doorbell? */
3432 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3433 
3434 	if (ring->use_doorbell) {
3435 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3436 				    DOORBELL_OFFSET, ring->doorbell_index);
3437 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438 				    DOORBELL_EN, 1);
3439 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3440 				    DOORBELL_SOURCE, 0);
3441 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3442 				    DOORBELL_HIT, 0);
3443 	} else {
3444 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3445 					 DOORBELL_EN, 0);
3446 	}
3447 
3448 	mqd->cp_hqd_pq_doorbell_control = tmp;
3449 
3450 	/* disable the queue if it's active */
3451 	ring->wptr = 0;
3452 	mqd->cp_hqd_dequeue_request = 0;
3453 	mqd->cp_hqd_pq_rptr = 0;
3454 	mqd->cp_hqd_pq_wptr_lo = 0;
3455 	mqd->cp_hqd_pq_wptr_hi = 0;
3456 
3457 	/* set the pointer to the MQD */
3458 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3459 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3460 
3461 	/* set MQD vmid to 0 */
3462 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3463 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3464 	mqd->cp_mqd_control = tmp;
3465 
3466 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3467 	hqd_gpu_addr = ring->gpu_addr >> 8;
3468 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3469 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3470 
3471 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3472 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3473 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3474 			    (order_base_2(ring->ring_size / 4) - 1));
3475 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3476 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3477 #ifdef __BIG_ENDIAN
3478 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3479 #endif
3480 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3481 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3482 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3483 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3484 	mqd->cp_hqd_pq_control = tmp;
3485 
3486 	/* set the wb address whether it's enabled or not */
3487 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3488 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3489 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3490 		upper_32_bits(wb_gpu_addr) & 0xffff;
3491 
3492 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3493 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3494 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3495 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3496 
3497 	tmp = 0;
3498 	/* enable the doorbell if requested */
3499 	if (ring->use_doorbell) {
3500 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3501 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502 				DOORBELL_OFFSET, ring->doorbell_index);
3503 
3504 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505 					 DOORBELL_EN, 1);
3506 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507 					 DOORBELL_SOURCE, 0);
3508 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509 					 DOORBELL_HIT, 0);
3510 	}
3511 
3512 	mqd->cp_hqd_pq_doorbell_control = tmp;
3513 
3514 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3515 	ring->wptr = 0;
3516 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3517 
3518 	/* set the vmid for the queue */
3519 	mqd->cp_hqd_vmid = 0;
3520 
3521 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3522 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3523 	mqd->cp_hqd_persistent_state = tmp;
3524 
3525 	/* set MIN_IB_AVAIL_SIZE */
3526 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3527 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3528 	mqd->cp_hqd_ib_control = tmp;
3529 
3530 	/* activate the queue */
3531 	mqd->cp_hqd_active = 1;
3532 
3533 	return 0;
3534 }
3535 
3536 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3537 {
3538 	struct amdgpu_device *adev = ring->adev;
3539 	struct v9_mqd *mqd = ring->mqd_ptr;
3540 	int j;
3541 
3542 	/* disable wptr polling */
3543 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3544 
3545 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3546 	       mqd->cp_hqd_eop_base_addr_lo);
3547 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3548 	       mqd->cp_hqd_eop_base_addr_hi);
3549 
3550 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3551 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3552 	       mqd->cp_hqd_eop_control);
3553 
3554 	/* enable doorbell? */
3555 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3556 	       mqd->cp_hqd_pq_doorbell_control);
3557 
3558 	/* disable the queue if it's active */
3559 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3560 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3561 		for (j = 0; j < adev->usec_timeout; j++) {
3562 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3563 				break;
3564 			udelay(1);
3565 		}
3566 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3567 		       mqd->cp_hqd_dequeue_request);
3568 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3569 		       mqd->cp_hqd_pq_rptr);
3570 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3571 		       mqd->cp_hqd_pq_wptr_lo);
3572 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3573 		       mqd->cp_hqd_pq_wptr_hi);
3574 	}
3575 
3576 	/* set the pointer to the MQD */
3577 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3578 	       mqd->cp_mqd_base_addr_lo);
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3580 	       mqd->cp_mqd_base_addr_hi);
3581 
3582 	/* set MQD vmid to 0 */
3583 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3584 	       mqd->cp_mqd_control);
3585 
3586 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3587 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3588 	       mqd->cp_hqd_pq_base_lo);
3589 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3590 	       mqd->cp_hqd_pq_base_hi);
3591 
3592 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3593 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3594 	       mqd->cp_hqd_pq_control);
3595 
3596 	/* set the wb address whether it's enabled or not */
3597 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3598 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3599 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3600 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3601 
3602 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3603 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3604 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3605 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3606 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3607 
3608 	/* enable the doorbell if requested */
3609 	if (ring->use_doorbell) {
3610 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3611 					(adev->doorbell_index.kiq * 2) << 2);
3612 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3613 					(adev->doorbell_index.userqueue_end * 2) << 2);
3614 	}
3615 
3616 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3617 	       mqd->cp_hqd_pq_doorbell_control);
3618 
3619 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3621 	       mqd->cp_hqd_pq_wptr_lo);
3622 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3623 	       mqd->cp_hqd_pq_wptr_hi);
3624 
3625 	/* set the vmid for the queue */
3626 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3627 
3628 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3629 	       mqd->cp_hqd_persistent_state);
3630 
3631 	/* activate the queue */
3632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3633 	       mqd->cp_hqd_active);
3634 
3635 	if (ring->use_doorbell)
3636 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3637 
3638 	return 0;
3639 }
3640 
3641 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3642 {
3643 	struct amdgpu_device *adev = ring->adev;
3644 	int j;
3645 
3646 	/* disable the queue if it's active */
3647 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3648 
3649 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3650 
3651 		for (j = 0; j < adev->usec_timeout; j++) {
3652 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3653 				break;
3654 			udelay(1);
3655 		}
3656 
3657 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3658 			DRM_DEBUG("KIQ dequeue request failed.\n");
3659 
3660 			/* Manual disable if dequeue request times out */
3661 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3662 		}
3663 
3664 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3665 		      0);
3666 	}
3667 
3668 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3670 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3672 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3674 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3675 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3676 
3677 	return 0;
3678 }
3679 
3680 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3681 {
3682 	struct amdgpu_device *adev = ring->adev;
3683 	struct v9_mqd *mqd = ring->mqd_ptr;
3684 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3685 
3686 	gfx_v9_0_kiq_setting(ring);
3687 
3688 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3689 		/* reset MQD to a clean status */
3690 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3691 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3692 
3693 		/* reset ring buffer */
3694 		ring->wptr = 0;
3695 		amdgpu_ring_clear_ring(ring);
3696 
3697 		mutex_lock(&adev->srbm_mutex);
3698 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3699 		gfx_v9_0_kiq_init_register(ring);
3700 		soc15_grbm_select(adev, 0, 0, 0, 0);
3701 		mutex_unlock(&adev->srbm_mutex);
3702 	} else {
3703 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3704 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3705 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3706 		mutex_lock(&adev->srbm_mutex);
3707 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3708 		gfx_v9_0_mqd_init(ring);
3709 		gfx_v9_0_kiq_init_register(ring);
3710 		soc15_grbm_select(adev, 0, 0, 0, 0);
3711 		mutex_unlock(&adev->srbm_mutex);
3712 
3713 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3714 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3715 	}
3716 
3717 	return 0;
3718 }
3719 
3720 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3721 {
3722 	struct amdgpu_device *adev = ring->adev;
3723 	struct v9_mqd *mqd = ring->mqd_ptr;
3724 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3725 
3726 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3727 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3728 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3729 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3730 		mutex_lock(&adev->srbm_mutex);
3731 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3732 		gfx_v9_0_mqd_init(ring);
3733 		soc15_grbm_select(adev, 0, 0, 0, 0);
3734 		mutex_unlock(&adev->srbm_mutex);
3735 
3736 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3737 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3738 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3739 		/* reset MQD to a clean status */
3740 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3741 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3742 
3743 		/* reset ring buffer */
3744 		ring->wptr = 0;
3745 		amdgpu_ring_clear_ring(ring);
3746 	} else {
3747 		amdgpu_ring_clear_ring(ring);
3748 	}
3749 
3750 	return 0;
3751 }
3752 
3753 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3754 {
3755 	struct amdgpu_ring *ring;
3756 	int r;
3757 
3758 	ring = &adev->gfx.kiq.ring;
3759 
3760 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3761 	if (unlikely(r != 0))
3762 		return r;
3763 
3764 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3765 	if (unlikely(r != 0))
3766 		return r;
3767 
3768 	gfx_v9_0_kiq_init_queue(ring);
3769 	amdgpu_bo_kunmap(ring->mqd_obj);
3770 	ring->mqd_ptr = NULL;
3771 	amdgpu_bo_unreserve(ring->mqd_obj);
3772 	ring->sched.ready = true;
3773 	return 0;
3774 }
3775 
3776 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3777 {
3778 	struct amdgpu_ring *ring = NULL;
3779 	int r = 0, i;
3780 
3781 	gfx_v9_0_cp_compute_enable(adev, true);
3782 
3783 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3784 		ring = &adev->gfx.compute_ring[i];
3785 
3786 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3787 		if (unlikely(r != 0))
3788 			goto done;
3789 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3790 		if (!r) {
3791 			r = gfx_v9_0_kcq_init_queue(ring);
3792 			amdgpu_bo_kunmap(ring->mqd_obj);
3793 			ring->mqd_ptr = NULL;
3794 		}
3795 		amdgpu_bo_unreserve(ring->mqd_obj);
3796 		if (r)
3797 			goto done;
3798 	}
3799 
3800 	r = gfx_v9_0_kiq_kcq_enable(adev);
3801 done:
3802 	return r;
3803 }
3804 
3805 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3806 {
3807 	int r, i;
3808 	struct amdgpu_ring *ring;
3809 
3810 	if (!(adev->flags & AMD_IS_APU))
3811 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3812 
3813 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3814 		if (adev->asic_type != CHIP_ARCTURUS) {
3815 			/* legacy firmware loading */
3816 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3817 			if (r)
3818 				return r;
3819 		}
3820 
3821 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3822 		if (r)
3823 			return r;
3824 	}
3825 
3826 	r = gfx_v9_0_kiq_resume(adev);
3827 	if (r)
3828 		return r;
3829 
3830 	if (adev->asic_type != CHIP_ARCTURUS) {
3831 		r = gfx_v9_0_cp_gfx_resume(adev);
3832 		if (r)
3833 			return r;
3834 	}
3835 
3836 	r = gfx_v9_0_kcq_resume(adev);
3837 	if (r)
3838 		return r;
3839 
3840 	if (adev->asic_type != CHIP_ARCTURUS) {
3841 		ring = &adev->gfx.gfx_ring[0];
3842 		r = amdgpu_ring_test_helper(ring);
3843 		if (r)
3844 			return r;
3845 	}
3846 
3847 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3848 		ring = &adev->gfx.compute_ring[i];
3849 		amdgpu_ring_test_helper(ring);
3850 	}
3851 
3852 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3853 
3854 	return 0;
3855 }
3856 
3857 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3858 {
3859 	if (adev->asic_type != CHIP_ARCTURUS)
3860 		gfx_v9_0_cp_gfx_enable(adev, enable);
3861 	gfx_v9_0_cp_compute_enable(adev, enable);
3862 }
3863 
3864 static int gfx_v9_0_hw_init(void *handle)
3865 {
3866 	int r;
3867 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868 
3869 	if (!amdgpu_sriov_vf(adev))
3870 		gfx_v9_0_init_golden_registers(adev);
3871 
3872 	gfx_v9_0_constants_init(adev);
3873 
3874 	r = gfx_v9_0_csb_vram_pin(adev);
3875 	if (r)
3876 		return r;
3877 
3878 	r = adev->gfx.rlc.funcs->resume(adev);
3879 	if (r)
3880 		return r;
3881 
3882 	r = gfx_v9_0_cp_resume(adev);
3883 	if (r)
3884 		return r;
3885 
3886 	if (adev->asic_type != CHIP_ARCTURUS) {
3887 		r = gfx_v9_0_ngg_en(adev);
3888 		if (r)
3889 			return r;
3890 	}
3891 
3892 	return r;
3893 }
3894 
3895 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3896 {
3897 	int r, i;
3898 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3899 
3900 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3901 	if (r)
3902 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3903 
3904 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3905 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3906 
3907 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3908 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3909 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3910 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3911 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3912 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3913 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3914 		amdgpu_ring_write(kiq_ring, 0);
3915 		amdgpu_ring_write(kiq_ring, 0);
3916 		amdgpu_ring_write(kiq_ring, 0);
3917 	}
3918 	r = amdgpu_ring_test_helper(kiq_ring);
3919 	if (r)
3920 		DRM_ERROR("KCQ disable failed\n");
3921 
3922 	return r;
3923 }
3924 
3925 static int gfx_v9_0_hw_fini(void *handle)
3926 {
3927 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3928 
3929 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3930 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3931 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3932 
3933 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3934 	gfx_v9_0_kcq_disable(adev);
3935 
3936 	if (amdgpu_sriov_vf(adev)) {
3937 		gfx_v9_0_cp_gfx_enable(adev, false);
3938 		/* must disable polling for SRIOV when hw finished, otherwise
3939 		 * CPC engine may still keep fetching WB address which is already
3940 		 * invalid after sw finished and trigger DMAR reading error in
3941 		 * hypervisor side.
3942 		 */
3943 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3944 		return 0;
3945 	}
3946 
3947 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3948 	 * otherwise KIQ is hanging when binding back
3949 	 */
3950 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3951 		mutex_lock(&adev->srbm_mutex);
3952 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3953 				adev->gfx.kiq.ring.pipe,
3954 				adev->gfx.kiq.ring.queue, 0);
3955 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3956 		soc15_grbm_select(adev, 0, 0, 0, 0);
3957 		mutex_unlock(&adev->srbm_mutex);
3958 	}
3959 
3960 	gfx_v9_0_cp_enable(adev, false);
3961 	adev->gfx.rlc.funcs->stop(adev);
3962 
3963 	gfx_v9_0_csb_vram_unpin(adev);
3964 
3965 	return 0;
3966 }
3967 
3968 static int gfx_v9_0_suspend(void *handle)
3969 {
3970 	return gfx_v9_0_hw_fini(handle);
3971 }
3972 
3973 static int gfx_v9_0_resume(void *handle)
3974 {
3975 	return gfx_v9_0_hw_init(handle);
3976 }
3977 
3978 static bool gfx_v9_0_is_idle(void *handle)
3979 {
3980 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3981 
3982 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3983 				GRBM_STATUS, GUI_ACTIVE))
3984 		return false;
3985 	else
3986 		return true;
3987 }
3988 
3989 static int gfx_v9_0_wait_for_idle(void *handle)
3990 {
3991 	unsigned i;
3992 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3993 
3994 	for (i = 0; i < adev->usec_timeout; i++) {
3995 		if (gfx_v9_0_is_idle(handle))
3996 			return 0;
3997 		udelay(1);
3998 	}
3999 	return -ETIMEDOUT;
4000 }
4001 
4002 static int gfx_v9_0_soft_reset(void *handle)
4003 {
4004 	u32 grbm_soft_reset = 0;
4005 	u32 tmp;
4006 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4007 
4008 	/* GRBM_STATUS */
4009 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4010 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4011 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4012 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4013 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4014 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4015 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4016 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4017 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4018 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4019 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4020 	}
4021 
4022 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4023 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4024 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4025 	}
4026 
4027 	/* GRBM_STATUS2 */
4028 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4029 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4030 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4031 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4032 
4033 
4034 	if (grbm_soft_reset) {
4035 		/* stop the rlc */
4036 		adev->gfx.rlc.funcs->stop(adev);
4037 
4038 		if (adev->asic_type != CHIP_ARCTURUS)
4039 			/* Disable GFX parsing/prefetching */
4040 			gfx_v9_0_cp_gfx_enable(adev, false);
4041 
4042 		/* Disable MEC parsing/prefetching */
4043 		gfx_v9_0_cp_compute_enable(adev, false);
4044 
4045 		if (grbm_soft_reset) {
4046 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4047 			tmp |= grbm_soft_reset;
4048 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4049 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4050 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4051 
4052 			udelay(50);
4053 
4054 			tmp &= ~grbm_soft_reset;
4055 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4056 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4057 		}
4058 
4059 		/* Wait a little for things to settle down */
4060 		udelay(50);
4061 	}
4062 	return 0;
4063 }
4064 
4065 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4066 {
4067 	uint64_t clock;
4068 
4069 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4070 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4071 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4072 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4073 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4074 	return clock;
4075 }
4076 
4077 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4078 					  uint32_t vmid,
4079 					  uint32_t gds_base, uint32_t gds_size,
4080 					  uint32_t gws_base, uint32_t gws_size,
4081 					  uint32_t oa_base, uint32_t oa_size)
4082 {
4083 	struct amdgpu_device *adev = ring->adev;
4084 
4085 	/* GDS Base */
4086 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4087 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4088 				   gds_base);
4089 
4090 	/* GDS Size */
4091 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4092 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4093 				   gds_size);
4094 
4095 	/* GWS */
4096 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4097 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4098 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4099 
4100 	/* OA */
4101 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4102 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4103 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4104 }
4105 
4106 static const u32 vgpr_init_compute_shader[] =
4107 {
4108 	0xb07c0000, 0xbe8000ff,
4109 	0x000000f8, 0xbf110800,
4110 	0x7e000280, 0x7e020280,
4111 	0x7e040280, 0x7e060280,
4112 	0x7e080280, 0x7e0a0280,
4113 	0x7e0c0280, 0x7e0e0280,
4114 	0x80808800, 0xbe803200,
4115 	0xbf84fff5, 0xbf9c0000,
4116 	0xd28c0001, 0x0001007f,
4117 	0xd28d0001, 0x0002027e,
4118 	0x10020288, 0xb8810904,
4119 	0xb7814000, 0xd1196a01,
4120 	0x00000301, 0xbe800087,
4121 	0xbefc00c1, 0xd89c4000,
4122 	0x00020201, 0xd89cc080,
4123 	0x00040401, 0x320202ff,
4124 	0x00000800, 0x80808100,
4125 	0xbf84fff8, 0x7e020280,
4126 	0xbf810000, 0x00000000,
4127 };
4128 
4129 static const u32 sgpr_init_compute_shader[] =
4130 {
4131 	0xb07c0000, 0xbe8000ff,
4132 	0x0000005f, 0xbee50080,
4133 	0xbe812c65, 0xbe822c65,
4134 	0xbe832c65, 0xbe842c65,
4135 	0xbe852c65, 0xb77c0005,
4136 	0x80808500, 0xbf84fff8,
4137 	0xbe800080, 0xbf810000,
4138 };
4139 
4140 static const struct soc15_reg_entry vgpr_init_regs[] = {
4141    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4151 };
4152 
4153 static const struct soc15_reg_entry sgpr_init_regs[] = {
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4164 };
4165 
4166 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4167    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4168    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4169    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4173    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4174    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4175    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4176    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4177    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4178    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4179    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4180    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4181    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4183    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4185    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4186    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4187    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4188    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4189    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4194    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4195    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4196    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4198    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4199 };
4200 
4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4202 {
4203 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4204 	int i, r;
4205 
4206 	r = amdgpu_ring_alloc(ring, 7);
4207 	if (r) {
4208 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4209 			ring->name, r);
4210 		return r;
4211 	}
4212 
4213 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4214 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4215 
4216 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4217 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4218 				PACKET3_DMA_DATA_DST_SEL(1) |
4219 				PACKET3_DMA_DATA_SRC_SEL(2) |
4220 				PACKET3_DMA_DATA_ENGINE(0)));
4221 	amdgpu_ring_write(ring, 0);
4222 	amdgpu_ring_write(ring, 0);
4223 	amdgpu_ring_write(ring, 0);
4224 	amdgpu_ring_write(ring, 0);
4225 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4226 				adev->gds.gds_size);
4227 
4228 	amdgpu_ring_commit(ring);
4229 
4230 	for (i = 0; i < adev->usec_timeout; i++) {
4231 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4232 			break;
4233 		udelay(1);
4234 	}
4235 
4236 	if (i >= adev->usec_timeout)
4237 		r = -ETIMEDOUT;
4238 
4239 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4240 
4241 	return r;
4242 }
4243 
4244 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4245 {
4246 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4247 	struct amdgpu_ib ib;
4248 	struct dma_fence *f = NULL;
4249 	int r, i, j, k;
4250 	unsigned total_size, vgpr_offset, sgpr_offset;
4251 	u64 gpu_addr;
4252 
4253 	/* only support when RAS is enabled */
4254 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4255 		return 0;
4256 
4257 	/* bail if the compute ring is not ready */
4258 	if (!ring->sched.ready)
4259 		return 0;
4260 
4261 	total_size =
4262 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4263 	total_size +=
4264 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4265 	total_size = ALIGN(total_size, 256);
4266 	vgpr_offset = total_size;
4267 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4268 	sgpr_offset = total_size;
4269 	total_size += sizeof(sgpr_init_compute_shader);
4270 
4271 	/* allocate an indirect buffer to put the commands in */
4272 	memset(&ib, 0, sizeof(ib));
4273 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4274 	if (r) {
4275 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4276 		return r;
4277 	}
4278 
4279 	/* load the compute shaders */
4280 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4281 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4282 
4283 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4284 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4285 
4286 	/* init the ib length to 0 */
4287 	ib.length_dw = 0;
4288 
4289 	/* VGPR */
4290 	/* write the register state for the compute dispatch */
4291 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4292 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4293 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4294 								- PACKET3_SET_SH_REG_START;
4295 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4296 	}
4297 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4298 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4299 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4300 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4301 							- PACKET3_SET_SH_REG_START;
4302 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4303 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4304 
4305 	/* write dispatch packet */
4306 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4307 	ib.ptr[ib.length_dw++] = 128; /* x */
4308 	ib.ptr[ib.length_dw++] = 1; /* y */
4309 	ib.ptr[ib.length_dw++] = 1; /* z */
4310 	ib.ptr[ib.length_dw++] =
4311 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4312 
4313 	/* write CS partial flush packet */
4314 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4315 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4316 
4317 	/* SGPR */
4318 	/* write the register state for the compute dispatch */
4319 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4320 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4321 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4322 								- PACKET3_SET_SH_REG_START;
4323 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4324 	}
4325 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4326 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4327 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4328 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4329 							- PACKET3_SET_SH_REG_START;
4330 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4331 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4332 
4333 	/* write dispatch packet */
4334 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4335 	ib.ptr[ib.length_dw++] = 128; /* x */
4336 	ib.ptr[ib.length_dw++] = 1; /* y */
4337 	ib.ptr[ib.length_dw++] = 1; /* z */
4338 	ib.ptr[ib.length_dw++] =
4339 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4340 
4341 	/* write CS partial flush packet */
4342 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4343 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4344 
4345 	/* shedule the ib on the ring */
4346 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4347 	if (r) {
4348 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4349 		goto fail;
4350 	}
4351 
4352 	/* wait for the GPU to finish processing the IB */
4353 	r = dma_fence_wait(f, false);
4354 	if (r) {
4355 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4356 		goto fail;
4357 	}
4358 
4359 	/* read back registers to clear the counters */
4360 	mutex_lock(&adev->grbm_idx_mutex);
4361 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4362 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4363 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4364 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4365 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4366 			}
4367 		}
4368 	}
4369 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4370 	mutex_unlock(&adev->grbm_idx_mutex);
4371 
4372 fail:
4373 	amdgpu_ib_free(adev, &ib, NULL);
4374 	dma_fence_put(f);
4375 
4376 	return r;
4377 }
4378 
4379 static int gfx_v9_0_early_init(void *handle)
4380 {
4381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4382 
4383 	if (adev->asic_type == CHIP_ARCTURUS)
4384 		adev->gfx.num_gfx_rings = 0;
4385 	else
4386 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4387 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4388 	gfx_v9_0_set_ring_funcs(adev);
4389 	gfx_v9_0_set_irq_funcs(adev);
4390 	gfx_v9_0_set_gds_init(adev);
4391 	gfx_v9_0_set_rlc_funcs(adev);
4392 
4393 	return 0;
4394 }
4395 
4396 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4397 		struct ras_err_data *err_data,
4398 		struct amdgpu_iv_entry *entry);
4399 
4400 static int gfx_v9_0_ecc_late_init(void *handle)
4401 {
4402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4403 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4404 	struct ras_ih_if ih_info = {
4405 		.cb = gfx_v9_0_process_ras_data_cb,
4406 	};
4407 	struct ras_fs_if fs_info = {
4408 		.sysfs_name = "gfx_err_count",
4409 		.debugfs_name = "gfx_err_inject",
4410 	};
4411 	struct ras_common_if ras_block = {
4412 		.block = AMDGPU_RAS_BLOCK__GFX,
4413 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4414 		.sub_block_index = 0,
4415 		.name = "gfx",
4416 	};
4417 	int r;
4418 
4419 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4420 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4421 		return 0;
4422 	}
4423 
4424 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4425 	if (r)
4426 		return r;
4427 
4428 	/* requires IBs so do in late init after IB pool is initialized */
4429 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4430 	if (r)
4431 		return r;
4432 
4433 	/* handle resume path. */
4434 	if (*ras_if) {
4435 		/* resend ras TA enable cmd during resume.
4436 		 * prepare to handle failure.
4437 		 */
4438 		ih_info.head = **ras_if;
4439 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4440 		if (r) {
4441 			if (r == -EAGAIN) {
4442 				/* request a gpu reset. will run again. */
4443 				amdgpu_ras_request_reset_on_boot(adev,
4444 						AMDGPU_RAS_BLOCK__GFX);
4445 				return 0;
4446 			}
4447 			/* fail to enable ras, cleanup all. */
4448 			goto irq;
4449 		}
4450 		/* enable successfully. continue. */
4451 		goto resume;
4452 	}
4453 
4454 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4455 	if (!*ras_if)
4456 		return -ENOMEM;
4457 
4458 	**ras_if = ras_block;
4459 
4460 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4461 	if (r) {
4462 		if (r == -EAGAIN) {
4463 			amdgpu_ras_request_reset_on_boot(adev,
4464 					AMDGPU_RAS_BLOCK__GFX);
4465 			r = 0;
4466 		}
4467 		goto feature;
4468 	}
4469 
4470 	ih_info.head = **ras_if;
4471 	fs_info.head = **ras_if;
4472 
4473 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4474 	if (r)
4475 		goto interrupt;
4476 
4477 	amdgpu_ras_debugfs_create(adev, &fs_info);
4478 
4479 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4480 	if (r)
4481 		goto sysfs;
4482 resume:
4483 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4484 	if (r)
4485 		goto irq;
4486 
4487 	return 0;
4488 irq:
4489 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4490 sysfs:
4491 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4492 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4493 interrupt:
4494 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4495 feature:
4496 	kfree(*ras_if);
4497 	*ras_if = NULL;
4498 	return r;
4499 }
4500 
4501 static int gfx_v9_0_late_init(void *handle)
4502 {
4503 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4504 	int r;
4505 
4506 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4507 	if (r)
4508 		return r;
4509 
4510 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4511 	if (r)
4512 		return r;
4513 
4514 	r = gfx_v9_0_ecc_late_init(handle);
4515 	if (r)
4516 		return r;
4517 
4518 	return 0;
4519 }
4520 
4521 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4522 {
4523 	uint32_t rlc_setting;
4524 
4525 	/* if RLC is not enabled, do nothing */
4526 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4527 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4528 		return false;
4529 
4530 	return true;
4531 }
4532 
4533 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4534 {
4535 	uint32_t data;
4536 	unsigned i;
4537 
4538 	data = RLC_SAFE_MODE__CMD_MASK;
4539 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4540 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4541 
4542 	/* wait for RLC_SAFE_MODE */
4543 	for (i = 0; i < adev->usec_timeout; i++) {
4544 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4545 			break;
4546 		udelay(1);
4547 	}
4548 }
4549 
4550 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4551 {
4552 	uint32_t data;
4553 
4554 	data = RLC_SAFE_MODE__CMD_MASK;
4555 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4556 }
4557 
4558 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4559 						bool enable)
4560 {
4561 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4562 
4563 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4564 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4565 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4566 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4567 	} else {
4568 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4569 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4570 	}
4571 
4572 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4573 }
4574 
4575 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4576 						bool enable)
4577 {
4578 	/* TODO: double check if we need to perform under safe mode */
4579 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4580 
4581 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4582 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4583 	else
4584 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4585 
4586 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4587 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4588 	else
4589 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4590 
4591 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4592 }
4593 
4594 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4595 						      bool enable)
4596 {
4597 	uint32_t data, def;
4598 
4599 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4600 
4601 	/* It is disabled by HW by default */
4602 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4603 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4604 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4605 
4606 		if (adev->asic_type != CHIP_VEGA12)
4607 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4608 
4609 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4610 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4611 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4612 
4613 		/* only for Vega10 & Raven1 */
4614 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4615 
4616 		if (def != data)
4617 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4618 
4619 		/* MGLS is a global flag to control all MGLS in GFX */
4620 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4621 			/* 2 - RLC memory Light sleep */
4622 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4623 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4624 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4625 				if (def != data)
4626 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4627 			}
4628 			/* 3 - CP memory Light sleep */
4629 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4630 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4631 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4632 				if (def != data)
4633 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4634 			}
4635 		}
4636 	} else {
4637 		/* 1 - MGCG_OVERRIDE */
4638 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4639 
4640 		if (adev->asic_type != CHIP_VEGA12)
4641 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4642 
4643 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4644 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4645 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4646 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4647 
4648 		if (def != data)
4649 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4650 
4651 		/* 2 - disable MGLS in RLC */
4652 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4653 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4654 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4655 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4656 		}
4657 
4658 		/* 3 - disable MGLS in CP */
4659 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4660 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4661 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4662 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4663 		}
4664 	}
4665 
4666 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4667 }
4668 
4669 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4670 					   bool enable)
4671 {
4672 	uint32_t data, def;
4673 
4674 	if (adev->asic_type == CHIP_ARCTURUS)
4675 		return;
4676 
4677 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4678 
4679 	/* Enable 3D CGCG/CGLS */
4680 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4681 		/* write cmd to clear cgcg/cgls ov */
4682 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4683 		/* unset CGCG override */
4684 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4685 		/* update CGCG and CGLS override bits */
4686 		if (def != data)
4687 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4688 
4689 		/* enable 3Dcgcg FSM(0x0000363f) */
4690 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4691 
4692 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4693 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4694 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4695 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4696 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4697 		if (def != data)
4698 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4699 
4700 		/* set IDLE_POLL_COUNT(0x00900100) */
4701 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4702 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4703 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4704 		if (def != data)
4705 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4706 	} else {
4707 		/* Disable CGCG/CGLS */
4708 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4709 		/* disable cgcg, cgls should be disabled */
4710 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4711 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4712 		/* disable cgcg and cgls in FSM */
4713 		if (def != data)
4714 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4715 	}
4716 
4717 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4718 }
4719 
4720 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4721 						      bool enable)
4722 {
4723 	uint32_t def, data;
4724 
4725 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4726 
4727 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4728 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4729 		/* unset CGCG override */
4730 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4731 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4732 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4733 		else
4734 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4735 		/* update CGCG and CGLS override bits */
4736 		if (def != data)
4737 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4738 
4739 		/* enable cgcg FSM(0x0000363F) */
4740 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4741 
4742 		if (adev->asic_type == CHIP_ARCTURUS)
4743 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4744 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4745 		else
4746 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4747 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4748 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4749 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4750 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4751 		if (def != data)
4752 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4753 
4754 		/* set IDLE_POLL_COUNT(0x00900100) */
4755 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4756 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4757 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4758 		if (def != data)
4759 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4760 	} else {
4761 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4762 		/* reset CGCG/CGLS bits */
4763 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4764 		/* disable cgcg and cgls in FSM */
4765 		if (def != data)
4766 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4767 	}
4768 
4769 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4770 }
4771 
4772 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4773 					    bool enable)
4774 {
4775 	if (enable) {
4776 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4777 		 * ===  MGCG + MGLS ===
4778 		 */
4779 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4780 		/* ===  CGCG /CGLS for GFX 3D Only === */
4781 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4782 		/* ===  CGCG + CGLS === */
4783 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4784 	} else {
4785 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4786 		 * ===  CGCG + CGLS ===
4787 		 */
4788 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4789 		/* ===  CGCG /CGLS for GFX 3D Only === */
4790 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4791 		/* ===  MGCG + MGLS === */
4792 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4793 	}
4794 	return 0;
4795 }
4796 
4797 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4798 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4799 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4800 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4801 	.init = gfx_v9_0_rlc_init,
4802 	.get_csb_size = gfx_v9_0_get_csb_size,
4803 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4804 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4805 	.resume = gfx_v9_0_rlc_resume,
4806 	.stop = gfx_v9_0_rlc_stop,
4807 	.reset = gfx_v9_0_rlc_reset,
4808 	.start = gfx_v9_0_rlc_start
4809 };
4810 
4811 static int gfx_v9_0_set_powergating_state(void *handle,
4812 					  enum amd_powergating_state state)
4813 {
4814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4815 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4816 
4817 	switch (adev->asic_type) {
4818 	case CHIP_RAVEN:
4819 	case CHIP_RENOIR:
4820 		if (!enable) {
4821 			amdgpu_gfx_off_ctrl(adev, false);
4822 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4823 		}
4824 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4825 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4826 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4827 		} else {
4828 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4829 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4830 		}
4831 
4832 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4833 			gfx_v9_0_enable_cp_power_gating(adev, true);
4834 		else
4835 			gfx_v9_0_enable_cp_power_gating(adev, false);
4836 
4837 		/* update gfx cgpg state */
4838 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4839 
4840 		/* update mgcg state */
4841 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4842 
4843 		if (enable)
4844 			amdgpu_gfx_off_ctrl(adev, true);
4845 		break;
4846 	case CHIP_VEGA12:
4847 		if (!enable) {
4848 			amdgpu_gfx_off_ctrl(adev, false);
4849 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4850 		} else {
4851 			amdgpu_gfx_off_ctrl(adev, true);
4852 		}
4853 		break;
4854 	default:
4855 		break;
4856 	}
4857 
4858 	return 0;
4859 }
4860 
4861 static int gfx_v9_0_set_clockgating_state(void *handle,
4862 					  enum amd_clockgating_state state)
4863 {
4864 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4865 
4866 	if (amdgpu_sriov_vf(adev))
4867 		return 0;
4868 
4869 	switch (adev->asic_type) {
4870 	case CHIP_VEGA10:
4871 	case CHIP_VEGA12:
4872 	case CHIP_VEGA20:
4873 	case CHIP_RAVEN:
4874 	case CHIP_ARCTURUS:
4875 	case CHIP_RENOIR:
4876 		gfx_v9_0_update_gfx_clock_gating(adev,
4877 						 state == AMD_CG_STATE_GATE ? true : false);
4878 		break;
4879 	default:
4880 		break;
4881 	}
4882 	return 0;
4883 }
4884 
4885 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4886 {
4887 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888 	int data;
4889 
4890 	if (amdgpu_sriov_vf(adev))
4891 		*flags = 0;
4892 
4893 	/* AMD_CG_SUPPORT_GFX_MGCG */
4894 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4895 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4896 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4897 
4898 	/* AMD_CG_SUPPORT_GFX_CGCG */
4899 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4900 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4901 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4902 
4903 	/* AMD_CG_SUPPORT_GFX_CGLS */
4904 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4905 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4906 
4907 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4908 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4909 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4910 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4911 
4912 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4913 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4914 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4915 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4916 
4917 	if (adev->asic_type != CHIP_ARCTURUS) {
4918 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4919 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4920 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4921 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4922 
4923 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4924 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4925 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4926 	}
4927 }
4928 
4929 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4930 {
4931 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4932 }
4933 
4934 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4935 {
4936 	struct amdgpu_device *adev = ring->adev;
4937 	u64 wptr;
4938 
4939 	/* XXX check if swapping is necessary on BE */
4940 	if (ring->use_doorbell) {
4941 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4942 	} else {
4943 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4944 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4945 	}
4946 
4947 	return wptr;
4948 }
4949 
4950 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4951 {
4952 	struct amdgpu_device *adev = ring->adev;
4953 
4954 	if (ring->use_doorbell) {
4955 		/* XXX check if swapping is necessary on BE */
4956 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4957 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4958 	} else {
4959 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4960 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4961 	}
4962 }
4963 
4964 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4965 {
4966 	struct amdgpu_device *adev = ring->adev;
4967 	u32 ref_and_mask, reg_mem_engine;
4968 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4969 
4970 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4971 		switch (ring->me) {
4972 		case 1:
4973 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4974 			break;
4975 		case 2:
4976 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4977 			break;
4978 		default:
4979 			return;
4980 		}
4981 		reg_mem_engine = 0;
4982 	} else {
4983 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4984 		reg_mem_engine = 1; /* pfp */
4985 	}
4986 
4987 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4988 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4989 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4990 			      ref_and_mask, ref_and_mask, 0x20);
4991 }
4992 
4993 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4994 					struct amdgpu_job *job,
4995 					struct amdgpu_ib *ib,
4996 					uint32_t flags)
4997 {
4998 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4999 	u32 header, control = 0;
5000 
5001 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5002 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5003 	else
5004 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5005 
5006 	control |= ib->length_dw | (vmid << 24);
5007 
5008 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5009 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5010 
5011 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5012 			gfx_v9_0_ring_emit_de_meta(ring);
5013 	}
5014 
5015 	amdgpu_ring_write(ring, header);
5016 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5017 	amdgpu_ring_write(ring,
5018 #ifdef __BIG_ENDIAN
5019 		(2 << 0) |
5020 #endif
5021 		lower_32_bits(ib->gpu_addr));
5022 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5023 	amdgpu_ring_write(ring, control);
5024 }
5025 
5026 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5027 					  struct amdgpu_job *job,
5028 					  struct amdgpu_ib *ib,
5029 					  uint32_t flags)
5030 {
5031 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5032 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5033 
5034 	/* Currently, there is a high possibility to get wave ID mismatch
5035 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5036 	 * different wave IDs than the GDS expects. This situation happens
5037 	 * randomly when at least 5 compute pipes use GDS ordered append.
5038 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5039 	 * Those are probably bugs somewhere else in the kernel driver.
5040 	 *
5041 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5042 	 * GDS to 0 for this ring (me/pipe).
5043 	 */
5044 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5045 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5046 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5047 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5048 	}
5049 
5050 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5051 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5052 	amdgpu_ring_write(ring,
5053 #ifdef __BIG_ENDIAN
5054 				(2 << 0) |
5055 #endif
5056 				lower_32_bits(ib->gpu_addr));
5057 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5058 	amdgpu_ring_write(ring, control);
5059 }
5060 
5061 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5062 				     u64 seq, unsigned flags)
5063 {
5064 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5065 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5066 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5067 
5068 	/* RELEASE_MEM - flush caches, send int */
5069 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5070 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5071 					       EOP_TC_NC_ACTION_EN) :
5072 					      (EOP_TCL1_ACTION_EN |
5073 					       EOP_TC_ACTION_EN |
5074 					       EOP_TC_WB_ACTION_EN |
5075 					       EOP_TC_MD_ACTION_EN)) |
5076 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5077 				 EVENT_INDEX(5)));
5078 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5079 
5080 	/*
5081 	 * the address should be Qword aligned if 64bit write, Dword
5082 	 * aligned if only send 32bit data low (discard data high)
5083 	 */
5084 	if (write64bit)
5085 		BUG_ON(addr & 0x7);
5086 	else
5087 		BUG_ON(addr & 0x3);
5088 	amdgpu_ring_write(ring, lower_32_bits(addr));
5089 	amdgpu_ring_write(ring, upper_32_bits(addr));
5090 	amdgpu_ring_write(ring, lower_32_bits(seq));
5091 	amdgpu_ring_write(ring, upper_32_bits(seq));
5092 	amdgpu_ring_write(ring, 0);
5093 }
5094 
5095 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5096 {
5097 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5098 	uint32_t seq = ring->fence_drv.sync_seq;
5099 	uint64_t addr = ring->fence_drv.gpu_addr;
5100 
5101 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5102 			      lower_32_bits(addr), upper_32_bits(addr),
5103 			      seq, 0xffffffff, 4);
5104 }
5105 
5106 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5107 					unsigned vmid, uint64_t pd_addr)
5108 {
5109 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5110 
5111 	/* compute doesn't have PFP */
5112 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5113 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5114 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5115 		amdgpu_ring_write(ring, 0x0);
5116 	}
5117 }
5118 
5119 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5120 {
5121 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5122 }
5123 
5124 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5125 {
5126 	u64 wptr;
5127 
5128 	/* XXX check if swapping is necessary on BE */
5129 	if (ring->use_doorbell)
5130 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5131 	else
5132 		BUG();
5133 	return wptr;
5134 }
5135 
5136 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5137 					   bool acquire)
5138 {
5139 	struct amdgpu_device *adev = ring->adev;
5140 	int pipe_num, tmp, reg;
5141 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5142 
5143 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5144 
5145 	/* first me only has 2 entries, GFX and HP3D */
5146 	if (ring->me > 0)
5147 		pipe_num -= 2;
5148 
5149 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5150 	tmp = RREG32(reg);
5151 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5152 	WREG32(reg, tmp);
5153 }
5154 
5155 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5156 					    struct amdgpu_ring *ring,
5157 					    bool acquire)
5158 {
5159 	int i, pipe;
5160 	bool reserve;
5161 	struct amdgpu_ring *iring;
5162 
5163 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5164 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5165 	if (acquire)
5166 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5167 	else
5168 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5169 
5170 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5171 		/* Clear all reservations - everyone reacquires all resources */
5172 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5173 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5174 						       true);
5175 
5176 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5177 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5178 						       true);
5179 	} else {
5180 		/* Lower all pipes without a current reservation */
5181 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5182 			iring = &adev->gfx.gfx_ring[i];
5183 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5184 							   iring->me,
5185 							   iring->pipe,
5186 							   0);
5187 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5188 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5189 		}
5190 
5191 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5192 			iring = &adev->gfx.compute_ring[i];
5193 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5194 							   iring->me,
5195 							   iring->pipe,
5196 							   0);
5197 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5198 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5199 		}
5200 	}
5201 
5202 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5203 }
5204 
5205 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5206 				      struct amdgpu_ring *ring,
5207 				      bool acquire)
5208 {
5209 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5210 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5211 
5212 	mutex_lock(&adev->srbm_mutex);
5213 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214 
5215 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5216 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5217 
5218 	soc15_grbm_select(adev, 0, 0, 0, 0);
5219 	mutex_unlock(&adev->srbm_mutex);
5220 }
5221 
5222 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5223 					       enum drm_sched_priority priority)
5224 {
5225 	struct amdgpu_device *adev = ring->adev;
5226 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5227 
5228 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5229 		return;
5230 
5231 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5232 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5233 }
5234 
5235 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5236 {
5237 	struct amdgpu_device *adev = ring->adev;
5238 
5239 	/* XXX check if swapping is necessary on BE */
5240 	if (ring->use_doorbell) {
5241 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5242 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5243 	} else{
5244 		BUG(); /* only DOORBELL method supported on gfx9 now */
5245 	}
5246 }
5247 
5248 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5249 					 u64 seq, unsigned int flags)
5250 {
5251 	struct amdgpu_device *adev = ring->adev;
5252 
5253 	/* we only allocate 32bit for each seq wb address */
5254 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5255 
5256 	/* write fence seq to the "addr" */
5257 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5258 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5259 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5260 	amdgpu_ring_write(ring, lower_32_bits(addr));
5261 	amdgpu_ring_write(ring, upper_32_bits(addr));
5262 	amdgpu_ring_write(ring, lower_32_bits(seq));
5263 
5264 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5265 		/* set register to trigger INT */
5266 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5267 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5268 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5269 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5270 		amdgpu_ring_write(ring, 0);
5271 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5272 	}
5273 }
5274 
5275 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5276 {
5277 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5278 	amdgpu_ring_write(ring, 0);
5279 }
5280 
5281 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5282 {
5283 	struct v9_ce_ib_state ce_payload = {0};
5284 	uint64_t csa_addr;
5285 	int cnt;
5286 
5287 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5288 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5289 
5290 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5291 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5292 				 WRITE_DATA_DST_SEL(8) |
5293 				 WR_CONFIRM) |
5294 				 WRITE_DATA_CACHE_POLICY(0));
5295 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5296 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5297 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5298 }
5299 
5300 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5301 {
5302 	struct v9_de_ib_state de_payload = {0};
5303 	uint64_t csa_addr, gds_addr;
5304 	int cnt;
5305 
5306 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5307 	gds_addr = csa_addr + 4096;
5308 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5309 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5310 
5311 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5312 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5313 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5314 				 WRITE_DATA_DST_SEL(8) |
5315 				 WR_CONFIRM) |
5316 				 WRITE_DATA_CACHE_POLICY(0));
5317 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5318 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5319 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5320 }
5321 
5322 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5323 {
5324 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5325 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5326 }
5327 
5328 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5329 {
5330 	uint32_t dw2 = 0;
5331 
5332 	if (amdgpu_sriov_vf(ring->adev))
5333 		gfx_v9_0_ring_emit_ce_meta(ring);
5334 
5335 	gfx_v9_0_ring_emit_tmz(ring, true);
5336 
5337 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5338 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5339 		/* set load_global_config & load_global_uconfig */
5340 		dw2 |= 0x8001;
5341 		/* set load_cs_sh_regs */
5342 		dw2 |= 0x01000000;
5343 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5344 		dw2 |= 0x10002;
5345 
5346 		/* set load_ce_ram if preamble presented */
5347 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5348 			dw2 |= 0x10000000;
5349 	} else {
5350 		/* still load_ce_ram if this is the first time preamble presented
5351 		 * although there is no context switch happens.
5352 		 */
5353 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5354 			dw2 |= 0x10000000;
5355 	}
5356 
5357 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5358 	amdgpu_ring_write(ring, dw2);
5359 	amdgpu_ring_write(ring, 0);
5360 }
5361 
5362 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5363 {
5364 	unsigned ret;
5365 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5366 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5367 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5368 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5369 	ret = ring->wptr & ring->buf_mask;
5370 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5371 	return ret;
5372 }
5373 
5374 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5375 {
5376 	unsigned cur;
5377 	BUG_ON(offset > ring->buf_mask);
5378 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5379 
5380 	cur = (ring->wptr & ring->buf_mask) - 1;
5381 	if (likely(cur > offset))
5382 		ring->ring[offset] = cur - offset;
5383 	else
5384 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5385 }
5386 
5387 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5388 {
5389 	struct amdgpu_device *adev = ring->adev;
5390 
5391 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5392 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5393 				(5 << 8) |	/* dst: memory */
5394 				(1 << 20));	/* write confirm */
5395 	amdgpu_ring_write(ring, reg);
5396 	amdgpu_ring_write(ring, 0);
5397 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5398 				adev->virt.reg_val_offs * 4));
5399 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5400 				adev->virt.reg_val_offs * 4));
5401 }
5402 
5403 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5404 				    uint32_t val)
5405 {
5406 	uint32_t cmd = 0;
5407 
5408 	switch (ring->funcs->type) {
5409 	case AMDGPU_RING_TYPE_GFX:
5410 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5411 		break;
5412 	case AMDGPU_RING_TYPE_KIQ:
5413 		cmd = (1 << 16); /* no inc addr */
5414 		break;
5415 	default:
5416 		cmd = WR_CONFIRM;
5417 		break;
5418 	}
5419 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5420 	amdgpu_ring_write(ring, cmd);
5421 	amdgpu_ring_write(ring, reg);
5422 	amdgpu_ring_write(ring, 0);
5423 	amdgpu_ring_write(ring, val);
5424 }
5425 
5426 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5427 					uint32_t val, uint32_t mask)
5428 {
5429 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5430 }
5431 
5432 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5433 						  uint32_t reg0, uint32_t reg1,
5434 						  uint32_t ref, uint32_t mask)
5435 {
5436 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5437 	struct amdgpu_device *adev = ring->adev;
5438 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5439 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5440 
5441 	if (fw_version_ok)
5442 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5443 				      ref, mask, 0x20);
5444 	else
5445 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5446 							   ref, mask);
5447 }
5448 
5449 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5450 {
5451 	struct amdgpu_device *adev = ring->adev;
5452 	uint32_t value = 0;
5453 
5454 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5455 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5456 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5457 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5458 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5459 }
5460 
5461 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5462 						 enum amdgpu_interrupt_state state)
5463 {
5464 	switch (state) {
5465 	case AMDGPU_IRQ_STATE_DISABLE:
5466 	case AMDGPU_IRQ_STATE_ENABLE:
5467 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5468 			       TIME_STAMP_INT_ENABLE,
5469 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5470 		break;
5471 	default:
5472 		break;
5473 	}
5474 }
5475 
5476 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5477 						     int me, int pipe,
5478 						     enum amdgpu_interrupt_state state)
5479 {
5480 	u32 mec_int_cntl, mec_int_cntl_reg;
5481 
5482 	/*
5483 	 * amdgpu controls only the first MEC. That's why this function only
5484 	 * handles the setting of interrupts for this specific MEC. All other
5485 	 * pipes' interrupts are set by amdkfd.
5486 	 */
5487 
5488 	if (me == 1) {
5489 		switch (pipe) {
5490 		case 0:
5491 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5492 			break;
5493 		case 1:
5494 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5495 			break;
5496 		case 2:
5497 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5498 			break;
5499 		case 3:
5500 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5501 			break;
5502 		default:
5503 			DRM_DEBUG("invalid pipe %d\n", pipe);
5504 			return;
5505 		}
5506 	} else {
5507 		DRM_DEBUG("invalid me %d\n", me);
5508 		return;
5509 	}
5510 
5511 	switch (state) {
5512 	case AMDGPU_IRQ_STATE_DISABLE:
5513 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5514 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5515 					     TIME_STAMP_INT_ENABLE, 0);
5516 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5517 		break;
5518 	case AMDGPU_IRQ_STATE_ENABLE:
5519 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5520 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5521 					     TIME_STAMP_INT_ENABLE, 1);
5522 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5523 		break;
5524 	default:
5525 		break;
5526 	}
5527 }
5528 
5529 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5530 					     struct amdgpu_irq_src *source,
5531 					     unsigned type,
5532 					     enum amdgpu_interrupt_state state)
5533 {
5534 	switch (state) {
5535 	case AMDGPU_IRQ_STATE_DISABLE:
5536 	case AMDGPU_IRQ_STATE_ENABLE:
5537 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5538 			       PRIV_REG_INT_ENABLE,
5539 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5540 		break;
5541 	default:
5542 		break;
5543 	}
5544 
5545 	return 0;
5546 }
5547 
5548 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5549 					      struct amdgpu_irq_src *source,
5550 					      unsigned type,
5551 					      enum amdgpu_interrupt_state state)
5552 {
5553 	switch (state) {
5554 	case AMDGPU_IRQ_STATE_DISABLE:
5555 	case AMDGPU_IRQ_STATE_ENABLE:
5556 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5557 			       PRIV_INSTR_INT_ENABLE,
5558 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5559 	default:
5560 		break;
5561 	}
5562 
5563 	return 0;
5564 }
5565 
5566 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5567 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5568 			CP_ECC_ERROR_INT_ENABLE, 1)
5569 
5570 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5571 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5572 			CP_ECC_ERROR_INT_ENABLE, 0)
5573 
5574 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5575 					      struct amdgpu_irq_src *source,
5576 					      unsigned type,
5577 					      enum amdgpu_interrupt_state state)
5578 {
5579 	switch (state) {
5580 	case AMDGPU_IRQ_STATE_DISABLE:
5581 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5582 				CP_ECC_ERROR_INT_ENABLE, 0);
5583 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5584 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5585 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5586 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5587 		break;
5588 
5589 	case AMDGPU_IRQ_STATE_ENABLE:
5590 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5591 				CP_ECC_ERROR_INT_ENABLE, 1);
5592 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5593 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5594 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5595 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5596 		break;
5597 	default:
5598 		break;
5599 	}
5600 
5601 	return 0;
5602 }
5603 
5604 
5605 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5606 					    struct amdgpu_irq_src *src,
5607 					    unsigned type,
5608 					    enum amdgpu_interrupt_state state)
5609 {
5610 	switch (type) {
5611 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5612 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5613 		break;
5614 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5615 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5616 		break;
5617 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5618 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5619 		break;
5620 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5621 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5622 		break;
5623 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5624 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5625 		break;
5626 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5627 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5628 		break;
5629 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5630 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5631 		break;
5632 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5633 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5634 		break;
5635 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5636 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5637 		break;
5638 	default:
5639 		break;
5640 	}
5641 	return 0;
5642 }
5643 
5644 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5645 			    struct amdgpu_irq_src *source,
5646 			    struct amdgpu_iv_entry *entry)
5647 {
5648 	int i;
5649 	u8 me_id, pipe_id, queue_id;
5650 	struct amdgpu_ring *ring;
5651 
5652 	DRM_DEBUG("IH: CP EOP\n");
5653 	me_id = (entry->ring_id & 0x0c) >> 2;
5654 	pipe_id = (entry->ring_id & 0x03) >> 0;
5655 	queue_id = (entry->ring_id & 0x70) >> 4;
5656 
5657 	switch (me_id) {
5658 	case 0:
5659 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5660 		break;
5661 	case 1:
5662 	case 2:
5663 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5664 			ring = &adev->gfx.compute_ring[i];
5665 			/* Per-queue interrupt is supported for MEC starting from VI.
5666 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5667 			  */
5668 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5669 				amdgpu_fence_process(ring);
5670 		}
5671 		break;
5672 	}
5673 	return 0;
5674 }
5675 
5676 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5677 			   struct amdgpu_iv_entry *entry)
5678 {
5679 	u8 me_id, pipe_id, queue_id;
5680 	struct amdgpu_ring *ring;
5681 	int i;
5682 
5683 	me_id = (entry->ring_id & 0x0c) >> 2;
5684 	pipe_id = (entry->ring_id & 0x03) >> 0;
5685 	queue_id = (entry->ring_id & 0x70) >> 4;
5686 
5687 	switch (me_id) {
5688 	case 0:
5689 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5690 		break;
5691 	case 1:
5692 	case 2:
5693 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5694 			ring = &adev->gfx.compute_ring[i];
5695 			if (ring->me == me_id && ring->pipe == pipe_id &&
5696 			    ring->queue == queue_id)
5697 				drm_sched_fault(&ring->sched);
5698 		}
5699 		break;
5700 	}
5701 }
5702 
5703 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5704 				 struct amdgpu_irq_src *source,
5705 				 struct amdgpu_iv_entry *entry)
5706 {
5707 	DRM_ERROR("Illegal register access in command stream\n");
5708 	gfx_v9_0_fault(adev, entry);
5709 	return 0;
5710 }
5711 
5712 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5713 				  struct amdgpu_irq_src *source,
5714 				  struct amdgpu_iv_entry *entry)
5715 {
5716 	DRM_ERROR("Illegal instruction in command stream\n");
5717 	gfx_v9_0_fault(adev, entry);
5718 	return 0;
5719 }
5720 
5721 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5722 		struct ras_err_data *err_data,
5723 		struct amdgpu_iv_entry *entry)
5724 {
5725 	/* TODO ue will trigger an interrupt. */
5726 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5727 	if (adev->gfx.funcs->query_ras_error_count)
5728 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5729 	amdgpu_ras_reset_gpu(adev, 0);
5730 	return AMDGPU_RAS_SUCCESS;
5731 }
5732 
5733 static const struct {
5734 	const char *name;
5735 	uint32_t ip;
5736 	uint32_t inst;
5737 	uint32_t seg;
5738 	uint32_t reg_offset;
5739 	uint32_t per_se_instance;
5740 	int32_t num_instance;
5741 	uint32_t sec_count_mask;
5742 	uint32_t ded_count_mask;
5743 } gfx_ras_edc_regs[] = {
5744 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5745 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5746 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5747 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5748 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5749 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5750 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5751 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5752 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5753 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5754 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5755 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5756 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5757 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5758 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5759 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5760 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5761 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5762 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5763 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5764 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5765 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5766 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5767 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5768 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5769 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5770 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5771 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5772 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5773 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5774 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5775 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5776 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5777 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5778 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5779 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5780 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5781 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5782 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5783 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5784 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5785 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5786 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5787 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5788 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5789 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5790 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5791 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5792 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5793 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5794 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5795 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5796 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5797 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5798 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5799 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5800 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5801 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5802 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5803 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5804 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5805 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5806 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5807 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5808 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5809 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5810 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5811 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5812 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5813 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5814 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5815 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5816 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5817 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5818 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5819 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5820 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5821 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5822 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5823 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5824 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5825 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5826 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5827 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5828 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5829 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5830 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5831 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5832 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5833 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5834 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5835 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5836 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5837 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5838 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5839 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5840 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5841 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5842 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5843 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5844 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5845 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5846 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5847 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5848 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5849 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5850 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5851 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5852 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5853 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5854 	  0 },
5855 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5856 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5857 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5858 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5859 	  0 },
5860 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5861 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5862 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5863 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5864 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5865 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5866 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5867 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5868 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5869 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5870 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5871 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5872 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5873 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5874 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5875 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5876 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5877 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5878 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5879 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5880 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5881 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5882 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5883 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5884 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5885 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5886 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5887 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5888 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5889 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5890 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5891 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5892 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5893 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5894 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5895 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5896 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5897 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5898 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5899 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5900 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5901 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5902 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5903 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5904 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5905 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5906 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5907 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5908 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5909 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5910 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5911 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5912 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5913 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5914 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5915 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5916 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5917 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5918 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5919 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5920 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5921 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5922 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5923 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5924 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5925 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5926 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5927 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5928 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5929 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5930 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5931 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5932 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5933 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5934 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5935 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5936 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5937 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5938 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5939 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5940 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5941 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5942 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5943 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5944 	  0 },
5945 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5946 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5947 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5948 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5949 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5950 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5951 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5952 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5953 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5954 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5955 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5956 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5957 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5958 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5959 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5960 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5961 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5962 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5963 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5964 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5965 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5966 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5967 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5968 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5969 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5970 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5971 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5972 	  0 },
5973 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5974 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5975 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5976 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5977 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5978 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5979 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5980 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5981 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5982 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5983 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5984 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5985 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5986 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5987 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5988 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5989 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5990 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5991 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5992 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5993 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5994 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5995 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5996 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5997 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5998 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5999 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6000 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6001 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6002 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6003 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6004 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6005 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6006 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6007 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6008 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6009 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6010 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6011 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6012 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6013 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6014 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6015 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6016 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6017 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6018 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6019 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6020 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6021 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6022 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6023 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6024 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6025 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6026 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6027 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6028 };
6029 
6030 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6031 				     void *inject_if)
6032 {
6033 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6034 	int ret;
6035 	struct ta_ras_trigger_error_input block_info = { 0 };
6036 
6037 	if (adev->asic_type != CHIP_VEGA20)
6038 		return -EINVAL;
6039 
6040 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6041 		return -EINVAL;
6042 
6043 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6044 		return -EPERM;
6045 
6046 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6047 	      info->head.type)) {
6048 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6049 			ras_gfx_subblocks[info->head.sub_block_index].name,
6050 			info->head.type);
6051 		return -EPERM;
6052 	}
6053 
6054 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6055 	      info->head.type)) {
6056 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6057 			ras_gfx_subblocks[info->head.sub_block_index].name,
6058 			info->head.type);
6059 		return -EPERM;
6060 	}
6061 
6062 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6063 	block_info.sub_block_index =
6064 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6065 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6066 	block_info.address = info->address;
6067 	block_info.value = info->value;
6068 
6069 	mutex_lock(&adev->grbm_idx_mutex);
6070 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6071 	mutex_unlock(&adev->grbm_idx_mutex);
6072 
6073 	return ret;
6074 }
6075 
6076 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6077 					  void *ras_error_status)
6078 {
6079 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6080 	uint32_t sec_count, ded_count;
6081 	uint32_t i;
6082 	uint32_t reg_value;
6083 	uint32_t se_id, instance_id;
6084 
6085 	if (adev->asic_type != CHIP_VEGA20)
6086 		return -EINVAL;
6087 
6088 	err_data->ue_count = 0;
6089 	err_data->ce_count = 0;
6090 
6091 	mutex_lock(&adev->grbm_idx_mutex);
6092 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6093 		for (instance_id = 0; instance_id < 256; instance_id++) {
6094 			for (i = 0;
6095 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6096 			     i++) {
6097 				if (se_id != 0 &&
6098 				    !gfx_ras_edc_regs[i].per_se_instance)
6099 					continue;
6100 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6101 					continue;
6102 
6103 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6104 						      instance_id);
6105 
6106 				reg_value = RREG32(
6107 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6108 							[gfx_ras_edc_regs[i].inst]
6109 							[gfx_ras_edc_regs[i].seg] +
6110 					gfx_ras_edc_regs[i].reg_offset);
6111 				sec_count = reg_value &
6112 					    gfx_ras_edc_regs[i].sec_count_mask;
6113 				ded_count = reg_value &
6114 					    gfx_ras_edc_regs[i].ded_count_mask;
6115 				if (sec_count) {
6116 					DRM_INFO(
6117 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6118 						se_id, instance_id,
6119 						gfx_ras_edc_regs[i].name,
6120 						sec_count);
6121 					err_data->ce_count++;
6122 				}
6123 
6124 				if (ded_count) {
6125 					DRM_INFO(
6126 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6127 						se_id, instance_id,
6128 						gfx_ras_edc_regs[i].name,
6129 						ded_count);
6130 					err_data->ue_count++;
6131 				}
6132 			}
6133 		}
6134 	}
6135 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6136 	mutex_unlock(&adev->grbm_idx_mutex);
6137 
6138 	return 0;
6139 }
6140 
6141 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6142 				  struct amdgpu_irq_src *source,
6143 				  struct amdgpu_iv_entry *entry)
6144 {
6145 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6146 	struct ras_dispatch_if ih_data = {
6147 		.entry = entry,
6148 	};
6149 
6150 	if (!ras_if)
6151 		return 0;
6152 
6153 	ih_data.head = *ras_if;
6154 
6155 	DRM_ERROR("CP ECC ERROR IRQ\n");
6156 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6157 	return 0;
6158 }
6159 
6160 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6161 	.name = "gfx_v9_0",
6162 	.early_init = gfx_v9_0_early_init,
6163 	.late_init = gfx_v9_0_late_init,
6164 	.sw_init = gfx_v9_0_sw_init,
6165 	.sw_fini = gfx_v9_0_sw_fini,
6166 	.hw_init = gfx_v9_0_hw_init,
6167 	.hw_fini = gfx_v9_0_hw_fini,
6168 	.suspend = gfx_v9_0_suspend,
6169 	.resume = gfx_v9_0_resume,
6170 	.is_idle = gfx_v9_0_is_idle,
6171 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6172 	.soft_reset = gfx_v9_0_soft_reset,
6173 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6174 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6175 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6176 };
6177 
6178 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6179 	.type = AMDGPU_RING_TYPE_GFX,
6180 	.align_mask = 0xff,
6181 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6182 	.support_64bit_ptrs = true,
6183 	.vmhub = AMDGPU_GFXHUB_0,
6184 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6185 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6186 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6187 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6188 		5 +  /* COND_EXEC */
6189 		7 +  /* PIPELINE_SYNC */
6190 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6191 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6192 		2 + /* VM_FLUSH */
6193 		8 +  /* FENCE for VM_FLUSH */
6194 		20 + /* GDS switch */
6195 		4 + /* double SWITCH_BUFFER,
6196 		       the first COND_EXEC jump to the place just
6197 			   prior to this double SWITCH_BUFFER  */
6198 		5 + /* COND_EXEC */
6199 		7 +	 /*	HDP_flush */
6200 		4 +	 /*	VGT_flush */
6201 		14 + /*	CE_META */
6202 		31 + /*	DE_META */
6203 		3 + /* CNTX_CTRL */
6204 		5 + /* HDP_INVL */
6205 		8 + 8 + /* FENCE x2 */
6206 		2, /* SWITCH_BUFFER */
6207 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6208 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6209 	.emit_fence = gfx_v9_0_ring_emit_fence,
6210 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6211 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6212 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6213 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6214 	.test_ring = gfx_v9_0_ring_test_ring,
6215 	.test_ib = gfx_v9_0_ring_test_ib,
6216 	.insert_nop = amdgpu_ring_insert_nop,
6217 	.pad_ib = amdgpu_ring_generic_pad_ib,
6218 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6219 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6220 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6221 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6222 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6223 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6224 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6225 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6226 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6227 };
6228 
6229 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6230 	.type = AMDGPU_RING_TYPE_COMPUTE,
6231 	.align_mask = 0xff,
6232 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6233 	.support_64bit_ptrs = true,
6234 	.vmhub = AMDGPU_GFXHUB_0,
6235 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6236 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6237 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6238 	.emit_frame_size =
6239 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6240 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6241 		5 + /* hdp invalidate */
6242 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6243 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6244 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6245 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6246 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6247 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6248 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6249 	.emit_fence = gfx_v9_0_ring_emit_fence,
6250 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6251 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6252 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6253 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6254 	.test_ring = gfx_v9_0_ring_test_ring,
6255 	.test_ib = gfx_v9_0_ring_test_ib,
6256 	.insert_nop = amdgpu_ring_insert_nop,
6257 	.pad_ib = amdgpu_ring_generic_pad_ib,
6258 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6259 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6260 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6261 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6262 };
6263 
6264 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6265 	.type = AMDGPU_RING_TYPE_KIQ,
6266 	.align_mask = 0xff,
6267 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6268 	.support_64bit_ptrs = true,
6269 	.vmhub = AMDGPU_GFXHUB_0,
6270 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6271 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6272 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6273 	.emit_frame_size =
6274 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6275 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6276 		5 + /* hdp invalidate */
6277 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6278 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6279 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6280 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6281 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6282 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6283 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6284 	.test_ring = gfx_v9_0_ring_test_ring,
6285 	.insert_nop = amdgpu_ring_insert_nop,
6286 	.pad_ib = amdgpu_ring_generic_pad_ib,
6287 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6288 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6289 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6290 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6291 };
6292 
6293 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6294 {
6295 	int i;
6296 
6297 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6298 
6299 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6300 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6301 
6302 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6303 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6304 }
6305 
6306 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6307 	.set = gfx_v9_0_set_eop_interrupt_state,
6308 	.process = gfx_v9_0_eop_irq,
6309 };
6310 
6311 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6312 	.set = gfx_v9_0_set_priv_reg_fault_state,
6313 	.process = gfx_v9_0_priv_reg_irq,
6314 };
6315 
6316 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6317 	.set = gfx_v9_0_set_priv_inst_fault_state,
6318 	.process = gfx_v9_0_priv_inst_irq,
6319 };
6320 
6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6322 	.set = gfx_v9_0_set_cp_ecc_error_state,
6323 	.process = gfx_v9_0_cp_ecc_error_irq,
6324 };
6325 
6326 
6327 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6328 {
6329 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6330 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6331 
6332 	adev->gfx.priv_reg_irq.num_types = 1;
6333 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6334 
6335 	adev->gfx.priv_inst_irq.num_types = 1;
6336 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6337 
6338 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6339 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6340 }
6341 
6342 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6343 {
6344 	switch (adev->asic_type) {
6345 	case CHIP_VEGA10:
6346 	case CHIP_VEGA12:
6347 	case CHIP_VEGA20:
6348 	case CHIP_RAVEN:
6349 	case CHIP_ARCTURUS:
6350 	case CHIP_RENOIR:
6351 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6352 		break;
6353 	default:
6354 		break;
6355 	}
6356 }
6357 
6358 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6359 {
6360 	/* init asci gds info */
6361 	switch (adev->asic_type) {
6362 	case CHIP_VEGA10:
6363 	case CHIP_VEGA12:
6364 	case CHIP_VEGA20:
6365 		adev->gds.gds_size = 0x10000;
6366 		break;
6367 	case CHIP_RAVEN:
6368 	case CHIP_ARCTURUS:
6369 		adev->gds.gds_size = 0x1000;
6370 		break;
6371 	default:
6372 		adev->gds.gds_size = 0x10000;
6373 		break;
6374 	}
6375 
6376 	switch (adev->asic_type) {
6377 	case CHIP_VEGA10:
6378 	case CHIP_VEGA20:
6379 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6380 		break;
6381 	case CHIP_VEGA12:
6382 		adev->gds.gds_compute_max_wave_id = 0x27f;
6383 		break;
6384 	case CHIP_RAVEN:
6385 		if (adev->rev_id >= 0x8)
6386 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6387 		else
6388 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6389 		break;
6390 	case CHIP_ARCTURUS:
6391 		adev->gds.gds_compute_max_wave_id = 0xfff;
6392 		break;
6393 	default:
6394 		/* this really depends on the chip */
6395 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6396 		break;
6397 	}
6398 
6399 	adev->gds.gws_size = 64;
6400 	adev->gds.oa_size = 16;
6401 }
6402 
6403 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6404 						 u32 bitmap)
6405 {
6406 	u32 data;
6407 
6408 	if (!bitmap)
6409 		return;
6410 
6411 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6412 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6413 
6414 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6415 }
6416 
6417 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6418 {
6419 	u32 data, mask;
6420 
6421 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6422 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6423 
6424 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6425 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6426 
6427 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6428 
6429 	return (~data) & mask;
6430 }
6431 
6432 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6433 				 struct amdgpu_cu_info *cu_info)
6434 {
6435 	int i, j, k, counter, active_cu_number = 0;
6436 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6437 	unsigned disable_masks[4 * 4];
6438 
6439 	if (!adev || !cu_info)
6440 		return -EINVAL;
6441 
6442 	/*
6443 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6444 	 */
6445 	if (adev->gfx.config.max_shader_engines *
6446 		adev->gfx.config.max_sh_per_se > 16)
6447 		return -EINVAL;
6448 
6449 	amdgpu_gfx_parse_disable_cu(disable_masks,
6450 				    adev->gfx.config.max_shader_engines,
6451 				    adev->gfx.config.max_sh_per_se);
6452 
6453 	mutex_lock(&adev->grbm_idx_mutex);
6454 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6455 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6456 			mask = 1;
6457 			ao_bitmap = 0;
6458 			counter = 0;
6459 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6460 			gfx_v9_0_set_user_cu_inactive_bitmap(
6461 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6462 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6463 
6464 			/*
6465 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6466 			 * 4x4 size array, and it's usually suitable for Vega
6467 			 * ASICs which has 4*2 SE/SH layout.
6468 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6469 			 * To mostly reduce the impact, we make it compatible
6470 			 * with current bitmap array as below:
6471 			 *    SE4,SH0 --> bitmap[0][1]
6472 			 *    SE5,SH0 --> bitmap[1][1]
6473 			 *    SE6,SH0 --> bitmap[2][1]
6474 			 *    SE7,SH0 --> bitmap[3][1]
6475 			 */
6476 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6477 
6478 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6479 				if (bitmap & mask) {
6480 					if (counter < adev->gfx.config.max_cu_per_sh)
6481 						ao_bitmap |= mask;
6482 					counter ++;
6483 				}
6484 				mask <<= 1;
6485 			}
6486 			active_cu_number += counter;
6487 			if (i < 2 && j < 2)
6488 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6489 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6490 		}
6491 	}
6492 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6493 	mutex_unlock(&adev->grbm_idx_mutex);
6494 
6495 	cu_info->number = active_cu_number;
6496 	cu_info->ao_cu_mask = ao_cu_mask;
6497 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6498 
6499 	return 0;
6500 }
6501 
6502 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6503 {
6504 	.type = AMD_IP_BLOCK_TYPE_GFX,
6505 	.major = 9,
6506 	.minor = 0,
6507 	.rev = 0,
6508 	.funcs = &gfx_v9_0_ip_funcs,
6509 };
6510