xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision b8d312aa)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 enum ta_ras_gfx_subblock {
135 	/*CPC*/
136 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPC_UCODE,
139 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	/* CPF*/
147 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 	TA_RAS_BLOCK__GFX_CPF_TAG,
151 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 	/* CPG*/
153 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 	TA_RAS_BLOCK__GFX_CPG_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 	/* GDS*/
159 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	/* SPI*/
167 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 	/* SQ*/
169 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 	/* SQC (3 ranges)*/
176 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 	/* SQC range 0*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	/* SQC range 1*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	/* SQC range 2*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 	/* TA*/
219 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	/* TCA*/
227 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	/* TCC (5 sub-ranges)*/
232 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 	/* TCC range 0*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	/* TCC range 1*/
245 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	/* TCC range 2*/
251 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	/* TCC range 3*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	/* TCC range 4*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 	/* TCI*/
277 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 	/* TCP*/
279 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	/* TD*/
289 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	/* EA (3 sub-ranges)*/
295 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 	/* EA range 0*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	/* EA range 1*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	/* EA range 2*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 	/* UTC VM L2 bank*/
326 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 	/* UTC VM walker*/
328 	TA_RAS_BLOCK__UTC_VML2_WALKER,
329 	/* UTC ATC L2 2MB cache*/
330 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 	/* UTC ATC L2 4KB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 	TA_RAS_BLOCK__GFX_MAX
334 };
335 
336 struct ras_gfx_subblock {
337 	unsigned char *name;
338 	int ta_subblock;
339 	int hw_supported_error_type;
340 	int sw_supported_error_type;
341 };
342 
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345 		#subblock,                                                     \
346 		TA_RAS_BLOCK__##subblock,                                      \
347 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349 	}
350 
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369 			     0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 			     0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382 			     0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 			     0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 			     0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392 			     1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 			     0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436 			     1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457 			     0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 			     0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464 			     0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500 
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
523 };
524 
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546 
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561 
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
588 };
589 
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600 
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623 
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646 
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666 
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
682 };
683 
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 };
695 
696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
697 {
698 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
699 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 };
707 
708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
709 {
710 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
711 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 };
719 
720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
724 
725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
730                                  struct amdgpu_cu_info *cu_info);
731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
736 					  void *ras_error_status);
737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
738 				     void *inject_if);
739 
740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742 	switch (adev->asic_type) {
743 	case CHIP_VEGA10:
744 		soc15_program_register_sequence(adev,
745 						golden_settings_gc_9_0,
746 						ARRAY_SIZE(golden_settings_gc_9_0));
747 		soc15_program_register_sequence(adev,
748 						golden_settings_gc_9_0_vg10,
749 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
750 		break;
751 	case CHIP_VEGA12:
752 		soc15_program_register_sequence(adev,
753 						golden_settings_gc_9_2_1,
754 						ARRAY_SIZE(golden_settings_gc_9_2_1));
755 		soc15_program_register_sequence(adev,
756 						golden_settings_gc_9_2_1_vg12,
757 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
758 		break;
759 	case CHIP_VEGA20:
760 		soc15_program_register_sequence(adev,
761 						golden_settings_gc_9_0,
762 						ARRAY_SIZE(golden_settings_gc_9_0));
763 		soc15_program_register_sequence(adev,
764 						golden_settings_gc_9_0_vg20,
765 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
766 		break;
767 	case CHIP_ARCTURUS:
768 		soc15_program_register_sequence(adev,
769 						golden_settings_gc_9_4_1_arct,
770 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
771 		break;
772 	case CHIP_RAVEN:
773 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
774 						ARRAY_SIZE(golden_settings_gc_9_1));
775 		if (adev->rev_id >= 8)
776 			soc15_program_register_sequence(adev,
777 							golden_settings_gc_9_1_rv2,
778 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
779 		else
780 			soc15_program_register_sequence(adev,
781 							golden_settings_gc_9_1_rv1,
782 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
783 		break;
784 	 case CHIP_RENOIR:
785 		soc15_program_register_sequence(adev,
786 						golden_settings_gc_9_1_rn,
787 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
788 		return; /* for renoir, don't need common goldensetting */
789 	default:
790 		break;
791 	}
792 
793 	if (adev->asic_type != CHIP_ARCTURUS)
794 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
795 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
796 }
797 
798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
799 {
800 	adev->gfx.scratch.num_reg = 8;
801 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
802 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
803 }
804 
805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
806 				       bool wc, uint32_t reg, uint32_t val)
807 {
808 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
809 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
810 				WRITE_DATA_DST_SEL(0) |
811 				(wc ? WR_CONFIRM : 0));
812 	amdgpu_ring_write(ring, reg);
813 	amdgpu_ring_write(ring, 0);
814 	amdgpu_ring_write(ring, val);
815 }
816 
817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
818 				  int mem_space, int opt, uint32_t addr0,
819 				  uint32_t addr1, uint32_t ref, uint32_t mask,
820 				  uint32_t inv)
821 {
822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
823 	amdgpu_ring_write(ring,
824 				 /* memory (1) or register (0) */
825 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
826 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
827 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
828 				 WAIT_REG_MEM_ENGINE(eng_sel)));
829 
830 	if (mem_space)
831 		BUG_ON(addr0 & 0x3); /* Dword align */
832 	amdgpu_ring_write(ring, addr0);
833 	amdgpu_ring_write(ring, addr1);
834 	amdgpu_ring_write(ring, ref);
835 	amdgpu_ring_write(ring, mask);
836 	amdgpu_ring_write(ring, inv); /* poll interval */
837 }
838 
839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
840 {
841 	struct amdgpu_device *adev = ring->adev;
842 	uint32_t scratch;
843 	uint32_t tmp = 0;
844 	unsigned i;
845 	int r;
846 
847 	r = amdgpu_gfx_scratch_get(adev, &scratch);
848 	if (r)
849 		return r;
850 
851 	WREG32(scratch, 0xCAFEDEAD);
852 	r = amdgpu_ring_alloc(ring, 3);
853 	if (r)
854 		goto error_free_scratch;
855 
856 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
857 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
858 	amdgpu_ring_write(ring, 0xDEADBEEF);
859 	amdgpu_ring_commit(ring);
860 
861 	for (i = 0; i < adev->usec_timeout; i++) {
862 		tmp = RREG32(scratch);
863 		if (tmp == 0xDEADBEEF)
864 			break;
865 		udelay(1);
866 	}
867 
868 	if (i >= adev->usec_timeout)
869 		r = -ETIMEDOUT;
870 
871 error_free_scratch:
872 	amdgpu_gfx_scratch_free(adev, scratch);
873 	return r;
874 }
875 
876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 {
878 	struct amdgpu_device *adev = ring->adev;
879 	struct amdgpu_ib ib;
880 	struct dma_fence *f = NULL;
881 
882 	unsigned index;
883 	uint64_t gpu_addr;
884 	uint32_t tmp;
885 	long r;
886 
887 	r = amdgpu_device_wb_get(adev, &index);
888 	if (r)
889 		return r;
890 
891 	gpu_addr = adev->wb.gpu_addr + (index * 4);
892 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
893 	memset(&ib, 0, sizeof(ib));
894 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
895 	if (r)
896 		goto err1;
897 
898 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
899 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
900 	ib.ptr[2] = lower_32_bits(gpu_addr);
901 	ib.ptr[3] = upper_32_bits(gpu_addr);
902 	ib.ptr[4] = 0xDEADBEEF;
903 	ib.length_dw = 5;
904 
905 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
906 	if (r)
907 		goto err2;
908 
909 	r = dma_fence_wait_timeout(f, false, timeout);
910 	if (r == 0) {
911 		r = -ETIMEDOUT;
912 		goto err2;
913 	} else if (r < 0) {
914 		goto err2;
915 	}
916 
917 	tmp = adev->wb.wb[index];
918 	if (tmp == 0xDEADBEEF)
919 		r = 0;
920 	else
921 		r = -EINVAL;
922 
923 err2:
924 	amdgpu_ib_free(adev, &ib, NULL);
925 	dma_fence_put(f);
926 err1:
927 	amdgpu_device_wb_free(adev, index);
928 	return r;
929 }
930 
931 
932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
933 {
934 	release_firmware(adev->gfx.pfp_fw);
935 	adev->gfx.pfp_fw = NULL;
936 	release_firmware(adev->gfx.me_fw);
937 	adev->gfx.me_fw = NULL;
938 	release_firmware(adev->gfx.ce_fw);
939 	adev->gfx.ce_fw = NULL;
940 	release_firmware(adev->gfx.rlc_fw);
941 	adev->gfx.rlc_fw = NULL;
942 	release_firmware(adev->gfx.mec_fw);
943 	adev->gfx.mec_fw = NULL;
944 	release_firmware(adev->gfx.mec2_fw);
945 	adev->gfx.mec2_fw = NULL;
946 
947 	kfree(adev->gfx.rlc.register_list_format);
948 }
949 
950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
951 {
952 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
953 
954 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
955 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
956 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
957 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
958 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
959 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
960 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
961 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
962 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
963 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
964 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
965 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
966 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
967 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
968 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
969 }
970 
971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
972 {
973 	adev->gfx.me_fw_write_wait = false;
974 	adev->gfx.mec_fw_write_wait = false;
975 
976 	switch (adev->asic_type) {
977 	case CHIP_VEGA10:
978 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
979 		    (adev->gfx.me_feature_version >= 42) &&
980 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
981 		    (adev->gfx.pfp_feature_version >= 42))
982 			adev->gfx.me_fw_write_wait = true;
983 
984 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
985 		    (adev->gfx.mec_feature_version >= 42))
986 			adev->gfx.mec_fw_write_wait = true;
987 		break;
988 	case CHIP_VEGA12:
989 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
990 		    (adev->gfx.me_feature_version >= 44) &&
991 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
992 		    (adev->gfx.pfp_feature_version >= 44))
993 			adev->gfx.me_fw_write_wait = true;
994 
995 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
996 		    (adev->gfx.mec_feature_version >= 44))
997 			adev->gfx.mec_fw_write_wait = true;
998 		break;
999 	case CHIP_VEGA20:
1000 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1001 		    (adev->gfx.me_feature_version >= 44) &&
1002 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1003 		    (adev->gfx.pfp_feature_version >= 44))
1004 			adev->gfx.me_fw_write_wait = true;
1005 
1006 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1007 		    (adev->gfx.mec_feature_version >= 44))
1008 			adev->gfx.mec_fw_write_wait = true;
1009 		break;
1010 	case CHIP_RAVEN:
1011 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1012 		    (adev->gfx.me_feature_version >= 42) &&
1013 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1014 		    (adev->gfx.pfp_feature_version >= 42))
1015 			adev->gfx.me_fw_write_wait = true;
1016 
1017 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1018 		    (adev->gfx.mec_feature_version >= 42))
1019 			adev->gfx.mec_fw_write_wait = true;
1020 		break;
1021 	default:
1022 		break;
1023 	}
1024 }
1025 
1026 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1027 {
1028 	switch (adev->asic_type) {
1029 	case CHIP_VEGA10:
1030 	case CHIP_VEGA12:
1031 	case CHIP_VEGA20:
1032 		break;
1033 	case CHIP_RAVEN:
1034 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1035 			&&((adev->gfx.rlc_fw_version != 106 &&
1036 			     adev->gfx.rlc_fw_version < 531) ||
1037 			    (adev->gfx.rlc_fw_version == 53815) ||
1038 			    (adev->gfx.rlc_feature_version < 1) ||
1039 			    !adev->gfx.rlc.is_rlc_v2_1))
1040 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1041 
1042 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1043 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1044 				AMD_PG_SUPPORT_CP |
1045 				AMD_PG_SUPPORT_RLC_SMU_HS;
1046 		break;
1047 	default:
1048 		break;
1049 	}
1050 }
1051 
1052 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1053 					  const char *chip_name)
1054 {
1055 	char fw_name[30];
1056 	int err;
1057 	struct amdgpu_firmware_info *info = NULL;
1058 	const struct common_firmware_header *header = NULL;
1059 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1060 
1061 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1062 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1063 	if (err)
1064 		goto out;
1065 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1066 	if (err)
1067 		goto out;
1068 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1069 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1070 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1071 
1072 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1073 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1074 	if (err)
1075 		goto out;
1076 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1077 	if (err)
1078 		goto out;
1079 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1080 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1081 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1082 
1083 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1084 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1085 	if (err)
1086 		goto out;
1087 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1088 	if (err)
1089 		goto out;
1090 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1091 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1092 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1093 
1094 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1095 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1096 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1097 		info->fw = adev->gfx.pfp_fw;
1098 		header = (const struct common_firmware_header *)info->fw->data;
1099 		adev->firmware.fw_size +=
1100 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1101 
1102 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1103 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1104 		info->fw = adev->gfx.me_fw;
1105 		header = (const struct common_firmware_header *)info->fw->data;
1106 		adev->firmware.fw_size +=
1107 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1108 
1109 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1110 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1111 		info->fw = adev->gfx.ce_fw;
1112 		header = (const struct common_firmware_header *)info->fw->data;
1113 		adev->firmware.fw_size +=
1114 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1115 	}
1116 
1117 out:
1118 	if (err) {
1119 		dev_err(adev->dev,
1120 			"gfx9: Failed to load firmware \"%s\"\n",
1121 			fw_name);
1122 		release_firmware(adev->gfx.pfp_fw);
1123 		adev->gfx.pfp_fw = NULL;
1124 		release_firmware(adev->gfx.me_fw);
1125 		adev->gfx.me_fw = NULL;
1126 		release_firmware(adev->gfx.ce_fw);
1127 		adev->gfx.ce_fw = NULL;
1128 	}
1129 	return err;
1130 }
1131 
1132 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1133 					  const char *chip_name)
1134 {
1135 	char fw_name[30];
1136 	int err;
1137 	struct amdgpu_firmware_info *info = NULL;
1138 	const struct common_firmware_header *header = NULL;
1139 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1140 	unsigned int *tmp = NULL;
1141 	unsigned int i = 0;
1142 	uint16_t version_major;
1143 	uint16_t version_minor;
1144 	uint32_t smu_version;
1145 
1146 	/*
1147 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1148 	 * instead of picasso_rlc.bin.
1149 	 * Judgment method:
1150 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1151 	 *          or revision >= 0xD8 && revision <= 0xDF
1152 	 * otherwise is PCO FP5
1153 	 */
1154 	if (!strcmp(chip_name, "picasso") &&
1155 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1156 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1157 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1158 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1159 		(smu_version >= 0x41e2b))
1160 		/**
1161 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1162 		*/
1163 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1164 	else
1165 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1166 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1167 	if (err)
1168 		goto out;
1169 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1170 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1171 
1172 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1173 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1174 	if (version_major == 2 && version_minor == 1)
1175 		adev->gfx.rlc.is_rlc_v2_1 = true;
1176 
1177 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1178 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1179 	adev->gfx.rlc.save_and_restore_offset =
1180 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1181 	adev->gfx.rlc.clear_state_descriptor_offset =
1182 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1183 	adev->gfx.rlc.avail_scratch_ram_locations =
1184 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1185 	adev->gfx.rlc.reg_restore_list_size =
1186 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1187 	adev->gfx.rlc.reg_list_format_start =
1188 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1189 	adev->gfx.rlc.reg_list_format_separate_start =
1190 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1191 	adev->gfx.rlc.starting_offsets_start =
1192 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1193 	adev->gfx.rlc.reg_list_format_size_bytes =
1194 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1195 	adev->gfx.rlc.reg_list_size_bytes =
1196 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1197 	adev->gfx.rlc.register_list_format =
1198 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1199 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1200 	if (!adev->gfx.rlc.register_list_format) {
1201 		err = -ENOMEM;
1202 		goto out;
1203 	}
1204 
1205 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1206 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1207 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1208 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1209 
1210 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1211 
1212 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1213 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1214 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1215 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1216 
1217 	if (adev->gfx.rlc.is_rlc_v2_1)
1218 		gfx_v9_0_init_rlc_ext_microcode(adev);
1219 
1220 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1221 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1222 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1223 		info->fw = adev->gfx.rlc_fw;
1224 		header = (const struct common_firmware_header *)info->fw->data;
1225 		adev->firmware.fw_size +=
1226 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1227 
1228 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1229 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1230 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1231 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1232 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1233 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1234 			info->fw = adev->gfx.rlc_fw;
1235 			adev->firmware.fw_size +=
1236 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1237 
1238 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1239 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1240 			info->fw = adev->gfx.rlc_fw;
1241 			adev->firmware.fw_size +=
1242 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1243 
1244 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1245 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1246 			info->fw = adev->gfx.rlc_fw;
1247 			adev->firmware.fw_size +=
1248 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1249 		}
1250 	}
1251 
1252 out:
1253 	if (err) {
1254 		dev_err(adev->dev,
1255 			"gfx9: Failed to load firmware \"%s\"\n",
1256 			fw_name);
1257 		release_firmware(adev->gfx.rlc_fw);
1258 		adev->gfx.rlc_fw = NULL;
1259 	}
1260 	return err;
1261 }
1262 
1263 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1264 					  const char *chip_name)
1265 {
1266 	char fw_name[30];
1267 	int err;
1268 	struct amdgpu_firmware_info *info = NULL;
1269 	const struct common_firmware_header *header = NULL;
1270 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1271 
1272 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1273 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1274 	if (err)
1275 		goto out;
1276 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1277 	if (err)
1278 		goto out;
1279 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1280 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1281 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1282 
1283 
1284 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1285 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1286 	if (!err) {
1287 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1288 		if (err)
1289 			goto out;
1290 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1291 		adev->gfx.mec2_fw->data;
1292 		adev->gfx.mec2_fw_version =
1293 		le32_to_cpu(cp_hdr->header.ucode_version);
1294 		adev->gfx.mec2_feature_version =
1295 		le32_to_cpu(cp_hdr->ucode_feature_version);
1296 	} else {
1297 		err = 0;
1298 		adev->gfx.mec2_fw = NULL;
1299 	}
1300 
1301 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1302 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1303 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1304 		info->fw = adev->gfx.mec_fw;
1305 		header = (const struct common_firmware_header *)info->fw->data;
1306 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1307 		adev->firmware.fw_size +=
1308 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1309 
1310 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1311 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1312 		info->fw = adev->gfx.mec_fw;
1313 		adev->firmware.fw_size +=
1314 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1315 
1316 		if (adev->gfx.mec2_fw) {
1317 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1318 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1319 			info->fw = adev->gfx.mec2_fw;
1320 			header = (const struct common_firmware_header *)info->fw->data;
1321 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1322 			adev->firmware.fw_size +=
1323 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1324 
1325 			/* TODO: Determine if MEC2 JT FW loading can be removed
1326 				 for all GFX V9 asic and above */
1327 			if (adev->asic_type != CHIP_ARCTURUS) {
1328 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1329 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1330 				info->fw = adev->gfx.mec2_fw;
1331 				adev->firmware.fw_size +=
1332 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1333 					PAGE_SIZE);
1334 			}
1335 		}
1336 	}
1337 
1338 out:
1339 	gfx_v9_0_check_if_need_gfxoff(adev);
1340 	gfx_v9_0_check_fw_write_wait(adev);
1341 	if (err) {
1342 		dev_err(adev->dev,
1343 			"gfx9: Failed to load firmware \"%s\"\n",
1344 			fw_name);
1345 		release_firmware(adev->gfx.mec_fw);
1346 		adev->gfx.mec_fw = NULL;
1347 		release_firmware(adev->gfx.mec2_fw);
1348 		adev->gfx.mec2_fw = NULL;
1349 	}
1350 	return err;
1351 }
1352 
1353 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1354 {
1355 	const char *chip_name;
1356 	int r;
1357 
1358 	DRM_DEBUG("\n");
1359 
1360 	switch (adev->asic_type) {
1361 	case CHIP_VEGA10:
1362 		chip_name = "vega10";
1363 		break;
1364 	case CHIP_VEGA12:
1365 		chip_name = "vega12";
1366 		break;
1367 	case CHIP_VEGA20:
1368 		chip_name = "vega20";
1369 		break;
1370 	case CHIP_RAVEN:
1371 		if (adev->rev_id >= 8)
1372 			chip_name = "raven2";
1373 		else if (adev->pdev->device == 0x15d8)
1374 			chip_name = "picasso";
1375 		else
1376 			chip_name = "raven";
1377 		break;
1378 	case CHIP_ARCTURUS:
1379 		chip_name = "arcturus";
1380 		break;
1381 	case CHIP_RENOIR:
1382 		chip_name = "renoir";
1383 		break;
1384 	default:
1385 		BUG();
1386 	}
1387 
1388 	/* No CPG in Arcturus */
1389 	if (adev->asic_type != CHIP_ARCTURUS) {
1390 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1391 		if (r)
1392 			return r;
1393 	}
1394 
1395 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1396 	if (r)
1397 		return r;
1398 
1399 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1400 	if (r)
1401 		return r;
1402 
1403 	return r;
1404 }
1405 
1406 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1407 {
1408 	u32 count = 0;
1409 	const struct cs_section_def *sect = NULL;
1410 	const struct cs_extent_def *ext = NULL;
1411 
1412 	/* begin clear state */
1413 	count += 2;
1414 	/* context control state */
1415 	count += 3;
1416 
1417 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1418 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1419 			if (sect->id == SECT_CONTEXT)
1420 				count += 2 + ext->reg_count;
1421 			else
1422 				return 0;
1423 		}
1424 	}
1425 
1426 	/* end clear state */
1427 	count += 2;
1428 	/* clear state */
1429 	count += 2;
1430 
1431 	return count;
1432 }
1433 
1434 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1435 				    volatile u32 *buffer)
1436 {
1437 	u32 count = 0, i;
1438 	const struct cs_section_def *sect = NULL;
1439 	const struct cs_extent_def *ext = NULL;
1440 
1441 	if (adev->gfx.rlc.cs_data == NULL)
1442 		return;
1443 	if (buffer == NULL)
1444 		return;
1445 
1446 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1447 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1448 
1449 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1450 	buffer[count++] = cpu_to_le32(0x80000000);
1451 	buffer[count++] = cpu_to_le32(0x80000000);
1452 
1453 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1454 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1455 			if (sect->id == SECT_CONTEXT) {
1456 				buffer[count++] =
1457 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1458 				buffer[count++] = cpu_to_le32(ext->reg_index -
1459 						PACKET3_SET_CONTEXT_REG_START);
1460 				for (i = 0; i < ext->reg_count; i++)
1461 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1462 			} else {
1463 				return;
1464 			}
1465 		}
1466 	}
1467 
1468 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1469 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1470 
1471 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1472 	buffer[count++] = cpu_to_le32(0);
1473 }
1474 
1475 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1476 {
1477 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1478 	uint32_t pg_always_on_cu_num = 2;
1479 	uint32_t always_on_cu_num;
1480 	uint32_t i, j, k;
1481 	uint32_t mask, cu_bitmap, counter;
1482 
1483 	if (adev->flags & AMD_IS_APU)
1484 		always_on_cu_num = 4;
1485 	else if (adev->asic_type == CHIP_VEGA12)
1486 		always_on_cu_num = 8;
1487 	else
1488 		always_on_cu_num = 12;
1489 
1490 	mutex_lock(&adev->grbm_idx_mutex);
1491 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1492 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1493 			mask = 1;
1494 			cu_bitmap = 0;
1495 			counter = 0;
1496 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1497 
1498 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1499 				if (cu_info->bitmap[i][j] & mask) {
1500 					if (counter == pg_always_on_cu_num)
1501 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1502 					if (counter < always_on_cu_num)
1503 						cu_bitmap |= mask;
1504 					else
1505 						break;
1506 					counter++;
1507 				}
1508 				mask <<= 1;
1509 			}
1510 
1511 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1512 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1513 		}
1514 	}
1515 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1516 	mutex_unlock(&adev->grbm_idx_mutex);
1517 }
1518 
1519 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1520 {
1521 	uint32_t data;
1522 
1523 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1524 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1525 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1526 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1527 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1528 
1529 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1530 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1531 
1532 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1533 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1534 
1535 	mutex_lock(&adev->grbm_idx_mutex);
1536 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1537 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1538 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1539 
1540 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1541 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1542 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1543 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1544 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1545 
1546 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1547 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1548 	data &= 0x0000FFFF;
1549 	data |= 0x00C00000;
1550 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1551 
1552 	/*
1553 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1554 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1555 	 */
1556 
1557 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1558 	 * but used for RLC_LB_CNTL configuration */
1559 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1560 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1561 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1562 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1563 	mutex_unlock(&adev->grbm_idx_mutex);
1564 
1565 	gfx_v9_0_init_always_on_cu_mask(adev);
1566 }
1567 
1568 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1569 {
1570 	uint32_t data;
1571 
1572 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1573 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1574 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1575 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1576 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1577 
1578 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1579 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1580 
1581 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1582 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1583 
1584 	mutex_lock(&adev->grbm_idx_mutex);
1585 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1586 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1587 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1588 
1589 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1590 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1591 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1592 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1593 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1594 
1595 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1596 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1597 	data &= 0x0000FFFF;
1598 	data |= 0x00C00000;
1599 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1600 
1601 	/*
1602 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1603 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1604 	 */
1605 
1606 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1607 	 * but used for RLC_LB_CNTL configuration */
1608 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1609 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1610 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1611 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1612 	mutex_unlock(&adev->grbm_idx_mutex);
1613 
1614 	gfx_v9_0_init_always_on_cu_mask(adev);
1615 }
1616 
1617 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1618 {
1619 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1620 }
1621 
1622 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1623 {
1624 	return 5;
1625 }
1626 
1627 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1628 {
1629 	const struct cs_section_def *cs_data;
1630 	int r;
1631 
1632 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1633 
1634 	cs_data = adev->gfx.rlc.cs_data;
1635 
1636 	if (cs_data) {
1637 		/* init clear state block */
1638 		r = amdgpu_gfx_rlc_init_csb(adev);
1639 		if (r)
1640 			return r;
1641 	}
1642 
1643 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1644 		/* TODO: double check the cp_table_size for RV */
1645 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1646 		r = amdgpu_gfx_rlc_init_cpt(adev);
1647 		if (r)
1648 			return r;
1649 	}
1650 
1651 	switch (adev->asic_type) {
1652 	case CHIP_RAVEN:
1653 	case CHIP_RENOIR:
1654 		gfx_v9_0_init_lbpw(adev);
1655 		break;
1656 	case CHIP_VEGA20:
1657 		gfx_v9_4_init_lbpw(adev);
1658 		break;
1659 	default:
1660 		break;
1661 	}
1662 
1663 	return 0;
1664 }
1665 
1666 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1667 {
1668 	int r;
1669 
1670 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1671 	if (unlikely(r != 0))
1672 		return r;
1673 
1674 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1675 			AMDGPU_GEM_DOMAIN_VRAM);
1676 	if (!r)
1677 		adev->gfx.rlc.clear_state_gpu_addr =
1678 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1679 
1680 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1681 
1682 	return r;
1683 }
1684 
1685 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1686 {
1687 	int r;
1688 
1689 	if (!adev->gfx.rlc.clear_state_obj)
1690 		return;
1691 
1692 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1693 	if (likely(r == 0)) {
1694 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1695 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1696 	}
1697 }
1698 
1699 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1700 {
1701 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1702 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1703 }
1704 
1705 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1706 {
1707 	int r;
1708 	u32 *hpd;
1709 	const __le32 *fw_data;
1710 	unsigned fw_size;
1711 	u32 *fw;
1712 	size_t mec_hpd_size;
1713 
1714 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1715 
1716 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1717 
1718 	/* take ownership of the relevant compute queues */
1719 	amdgpu_gfx_compute_queue_acquire(adev);
1720 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1721 
1722 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1723 				      AMDGPU_GEM_DOMAIN_VRAM,
1724 				      &adev->gfx.mec.hpd_eop_obj,
1725 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1726 				      (void **)&hpd);
1727 	if (r) {
1728 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1729 		gfx_v9_0_mec_fini(adev);
1730 		return r;
1731 	}
1732 
1733 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1734 
1735 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1736 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1737 
1738 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1739 
1740 	fw_data = (const __le32 *)
1741 		(adev->gfx.mec_fw->data +
1742 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1743 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1744 
1745 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1746 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1747 				      &adev->gfx.mec.mec_fw_obj,
1748 				      &adev->gfx.mec.mec_fw_gpu_addr,
1749 				      (void **)&fw);
1750 	if (r) {
1751 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1752 		gfx_v9_0_mec_fini(adev);
1753 		return r;
1754 	}
1755 
1756 	memcpy(fw, fw_data, fw_size);
1757 
1758 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1759 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1760 
1761 	return 0;
1762 }
1763 
1764 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1765 {
1766 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1767 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1768 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1769 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1770 		(SQ_IND_INDEX__FORCE_READ_MASK));
1771 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1772 }
1773 
1774 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1775 			   uint32_t wave, uint32_t thread,
1776 			   uint32_t regno, uint32_t num, uint32_t *out)
1777 {
1778 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1779 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1780 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1781 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1782 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1783 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1784 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1785 	while (num--)
1786 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1787 }
1788 
1789 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1790 {
1791 	/* type 1 wave data */
1792 	dst[(*no_fields)++] = 1;
1793 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1794 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1795 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1796 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1797 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1798 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1799 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1800 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1801 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1802 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1803 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1804 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1805 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1806 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1807 }
1808 
1809 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1810 				     uint32_t wave, uint32_t start,
1811 				     uint32_t size, uint32_t *dst)
1812 {
1813 	wave_read_regs(
1814 		adev, simd, wave, 0,
1815 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1816 }
1817 
1818 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1819 				     uint32_t wave, uint32_t thread,
1820 				     uint32_t start, uint32_t size,
1821 				     uint32_t *dst)
1822 {
1823 	wave_read_regs(
1824 		adev, simd, wave, thread,
1825 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1826 }
1827 
1828 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1829 				  u32 me, u32 pipe, u32 q, u32 vm)
1830 {
1831 	soc15_grbm_select(adev, me, pipe, q, vm);
1832 }
1833 
1834 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1835 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1836 	.select_se_sh = &gfx_v9_0_select_se_sh,
1837 	.read_wave_data = &gfx_v9_0_read_wave_data,
1838 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1839 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1840 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1841 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1842 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1843 };
1844 
1845 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1846 {
1847 	u32 gb_addr_config;
1848 	int err;
1849 
1850 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1851 
1852 	switch (adev->asic_type) {
1853 	case CHIP_VEGA10:
1854 		adev->gfx.config.max_hw_contexts = 8;
1855 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1856 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1857 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1858 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1859 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1860 		break;
1861 	case CHIP_VEGA12:
1862 		adev->gfx.config.max_hw_contexts = 8;
1863 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1864 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1865 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1866 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1867 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1868 		DRM_INFO("fix gfx.config for vega12\n");
1869 		break;
1870 	case CHIP_VEGA20:
1871 		adev->gfx.config.max_hw_contexts = 8;
1872 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1873 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1874 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1875 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1876 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1877 		gb_addr_config &= ~0xf3e777ff;
1878 		gb_addr_config |= 0x22014042;
1879 		/* check vbios table if gpu info is not available */
1880 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1881 		if (err)
1882 			return err;
1883 		break;
1884 	case CHIP_RAVEN:
1885 		adev->gfx.config.max_hw_contexts = 8;
1886 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1887 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1888 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1889 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1890 		if (adev->rev_id >= 8)
1891 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1892 		else
1893 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1894 		break;
1895 	case CHIP_ARCTURUS:
1896 		adev->gfx.config.max_hw_contexts = 8;
1897 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1900 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1901 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1902 		gb_addr_config &= ~0xf3e777ff;
1903 		gb_addr_config |= 0x22014042;
1904 		break;
1905 	case CHIP_RENOIR:
1906 		adev->gfx.config.max_hw_contexts = 8;
1907 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1908 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1909 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1910 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1911 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1912 		gb_addr_config &= ~0xf3e777ff;
1913 		gb_addr_config |= 0x22010042;
1914 		break;
1915 	default:
1916 		BUG();
1917 		break;
1918 	}
1919 
1920 	adev->gfx.config.gb_addr_config = gb_addr_config;
1921 
1922 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1923 			REG_GET_FIELD(
1924 					adev->gfx.config.gb_addr_config,
1925 					GB_ADDR_CONFIG,
1926 					NUM_PIPES);
1927 
1928 	adev->gfx.config.max_tile_pipes =
1929 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1930 
1931 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1932 			REG_GET_FIELD(
1933 					adev->gfx.config.gb_addr_config,
1934 					GB_ADDR_CONFIG,
1935 					NUM_BANKS);
1936 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1937 			REG_GET_FIELD(
1938 					adev->gfx.config.gb_addr_config,
1939 					GB_ADDR_CONFIG,
1940 					MAX_COMPRESSED_FRAGS);
1941 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1942 			REG_GET_FIELD(
1943 					adev->gfx.config.gb_addr_config,
1944 					GB_ADDR_CONFIG,
1945 					NUM_RB_PER_SE);
1946 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1947 			REG_GET_FIELD(
1948 					adev->gfx.config.gb_addr_config,
1949 					GB_ADDR_CONFIG,
1950 					NUM_SHADER_ENGINES);
1951 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1952 			REG_GET_FIELD(
1953 					adev->gfx.config.gb_addr_config,
1954 					GB_ADDR_CONFIG,
1955 					PIPE_INTERLEAVE_SIZE));
1956 
1957 	return 0;
1958 }
1959 
1960 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1961 				   struct amdgpu_ngg_buf *ngg_buf,
1962 				   int size_se,
1963 				   int default_size_se)
1964 {
1965 	int r;
1966 
1967 	if (size_se < 0) {
1968 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1969 		return -EINVAL;
1970 	}
1971 	size_se = size_se ? size_se : default_size_se;
1972 
1973 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1974 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1975 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1976 				    &ngg_buf->bo,
1977 				    &ngg_buf->gpu_addr,
1978 				    NULL);
1979 	if (r) {
1980 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1981 		return r;
1982 	}
1983 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1984 
1985 	return r;
1986 }
1987 
1988 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1989 {
1990 	int i;
1991 
1992 	for (i = 0; i < NGG_BUF_MAX; i++)
1993 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1994 				      &adev->gfx.ngg.buf[i].gpu_addr,
1995 				      NULL);
1996 
1997 	memset(&adev->gfx.ngg.buf[0], 0,
1998 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1999 
2000 	adev->gfx.ngg.init = false;
2001 
2002 	return 0;
2003 }
2004 
2005 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2006 {
2007 	int r;
2008 
2009 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2010 		return 0;
2011 
2012 	/* GDS reserve memory: 64 bytes alignment */
2013 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2014 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2015 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2016 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2017 
2018 	/* Primitive Buffer */
2019 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2020 				    amdgpu_prim_buf_per_se,
2021 				    64 * 1024);
2022 	if (r) {
2023 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2024 		goto err;
2025 	}
2026 
2027 	/* Position Buffer */
2028 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2029 				    amdgpu_pos_buf_per_se,
2030 				    256 * 1024);
2031 	if (r) {
2032 		dev_err(adev->dev, "Failed to create Position Buffer\n");
2033 		goto err;
2034 	}
2035 
2036 	/* Control Sideband */
2037 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2038 				    amdgpu_cntl_sb_buf_per_se,
2039 				    256);
2040 	if (r) {
2041 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2042 		goto err;
2043 	}
2044 
2045 	/* Parameter Cache, not created by default */
2046 	if (amdgpu_param_buf_per_se <= 0)
2047 		goto out;
2048 
2049 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2050 				    amdgpu_param_buf_per_se,
2051 				    512 * 1024);
2052 	if (r) {
2053 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2054 		goto err;
2055 	}
2056 
2057 out:
2058 	adev->gfx.ngg.init = true;
2059 	return 0;
2060 err:
2061 	gfx_v9_0_ngg_fini(adev);
2062 	return r;
2063 }
2064 
2065 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2066 {
2067 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2068 	int r;
2069 	u32 data, base;
2070 
2071 	if (!amdgpu_ngg)
2072 		return 0;
2073 
2074 	/* Program buffer size */
2075 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2076 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2077 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2078 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2079 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2080 
2081 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2082 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2083 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2084 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2085 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2086 
2087 	/* Program buffer base address */
2088 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2089 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2090 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2091 
2092 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2093 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2094 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2095 
2096 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2097 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2098 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2099 
2100 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2101 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2102 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2103 
2104 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2105 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2106 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2107 
2108 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2109 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2110 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2111 
2112 	/* Clear GDS reserved memory */
2113 	r = amdgpu_ring_alloc(ring, 17);
2114 	if (r) {
2115 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2116 			  ring->name, r);
2117 		return r;
2118 	}
2119 
2120 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2121 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2122 			           (adev->gds.gds_size +
2123 				    adev->gfx.ngg.gds_reserve_size));
2124 
2125 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2126 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2127 				PACKET3_DMA_DATA_DST_SEL(1) |
2128 				PACKET3_DMA_DATA_SRC_SEL(2)));
2129 	amdgpu_ring_write(ring, 0);
2130 	amdgpu_ring_write(ring, 0);
2131 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2132 	amdgpu_ring_write(ring, 0);
2133 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2134 				adev->gfx.ngg.gds_reserve_size);
2135 
2136 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2137 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2138 
2139 	amdgpu_ring_commit(ring);
2140 
2141 	return 0;
2142 }
2143 
2144 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2145 				      int mec, int pipe, int queue)
2146 {
2147 	int r;
2148 	unsigned irq_type;
2149 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2150 
2151 	ring = &adev->gfx.compute_ring[ring_id];
2152 
2153 	/* mec0 is me1 */
2154 	ring->me = mec + 1;
2155 	ring->pipe = pipe;
2156 	ring->queue = queue;
2157 
2158 	ring->ring_obj = NULL;
2159 	ring->use_doorbell = true;
2160 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2161 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2162 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2163 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2164 
2165 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2166 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2167 		+ ring->pipe;
2168 
2169 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2170 	r = amdgpu_ring_init(adev, ring, 1024,
2171 			     &adev->gfx.eop_irq, irq_type);
2172 	if (r)
2173 		return r;
2174 
2175 
2176 	return 0;
2177 }
2178 
2179 static int gfx_v9_0_sw_init(void *handle)
2180 {
2181 	int i, j, k, r, ring_id;
2182 	struct amdgpu_ring *ring;
2183 	struct amdgpu_kiq *kiq;
2184 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2185 
2186 	switch (adev->asic_type) {
2187 	case CHIP_VEGA10:
2188 	case CHIP_VEGA12:
2189 	case CHIP_VEGA20:
2190 	case CHIP_RAVEN:
2191 	case CHIP_ARCTURUS:
2192 	case CHIP_RENOIR:
2193 		adev->gfx.mec.num_mec = 2;
2194 		break;
2195 	default:
2196 		adev->gfx.mec.num_mec = 1;
2197 		break;
2198 	}
2199 
2200 	adev->gfx.mec.num_pipe_per_mec = 4;
2201 	adev->gfx.mec.num_queue_per_pipe = 8;
2202 
2203 	/* EOP Event */
2204 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2205 	if (r)
2206 		return r;
2207 
2208 	/* Privileged reg */
2209 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2210 			      &adev->gfx.priv_reg_irq);
2211 	if (r)
2212 		return r;
2213 
2214 	/* Privileged inst */
2215 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2216 			      &adev->gfx.priv_inst_irq);
2217 	if (r)
2218 		return r;
2219 
2220 	/* ECC error */
2221 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2222 			      &adev->gfx.cp_ecc_error_irq);
2223 	if (r)
2224 		return r;
2225 
2226 	/* FUE error */
2227 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2228 			      &adev->gfx.cp_ecc_error_irq);
2229 	if (r)
2230 		return r;
2231 
2232 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2233 
2234 	gfx_v9_0_scratch_init(adev);
2235 
2236 	r = gfx_v9_0_init_microcode(adev);
2237 	if (r) {
2238 		DRM_ERROR("Failed to load gfx firmware!\n");
2239 		return r;
2240 	}
2241 
2242 	r = adev->gfx.rlc.funcs->init(adev);
2243 	if (r) {
2244 		DRM_ERROR("Failed to init rlc BOs!\n");
2245 		return r;
2246 	}
2247 
2248 	r = gfx_v9_0_mec_init(adev);
2249 	if (r) {
2250 		DRM_ERROR("Failed to init MEC BOs!\n");
2251 		return r;
2252 	}
2253 
2254 	/* set up the gfx ring */
2255 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2256 		ring = &adev->gfx.gfx_ring[i];
2257 		ring->ring_obj = NULL;
2258 		if (!i)
2259 			sprintf(ring->name, "gfx");
2260 		else
2261 			sprintf(ring->name, "gfx_%d", i);
2262 		ring->use_doorbell = true;
2263 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2264 		r = amdgpu_ring_init(adev, ring, 1024,
2265 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2266 		if (r)
2267 			return r;
2268 	}
2269 
2270 	/* set up the compute queues - allocate horizontally across pipes */
2271 	ring_id = 0;
2272 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2273 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2274 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2275 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2276 					continue;
2277 
2278 				r = gfx_v9_0_compute_ring_init(adev,
2279 							       ring_id,
2280 							       i, k, j);
2281 				if (r)
2282 					return r;
2283 
2284 				ring_id++;
2285 			}
2286 		}
2287 	}
2288 
2289 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2290 	if (r) {
2291 		DRM_ERROR("Failed to init KIQ BOs!\n");
2292 		return r;
2293 	}
2294 
2295 	kiq = &adev->gfx.kiq;
2296 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2297 	if (r)
2298 		return r;
2299 
2300 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2301 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2302 	if (r)
2303 		return r;
2304 
2305 	adev->gfx.ce_ram_size = 0x8000;
2306 
2307 	r = gfx_v9_0_gpu_early_init(adev);
2308 	if (r)
2309 		return r;
2310 
2311 	r = gfx_v9_0_ngg_init(adev);
2312 	if (r)
2313 		return r;
2314 
2315 	return 0;
2316 }
2317 
2318 
2319 static int gfx_v9_0_sw_fini(void *handle)
2320 {
2321 	int i;
2322 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2323 
2324 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2325 			adev->gfx.ras_if) {
2326 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2327 		struct ras_ih_if ih_info = {
2328 			.head = *ras_if,
2329 		};
2330 
2331 		amdgpu_ras_debugfs_remove(adev, ras_if);
2332 		amdgpu_ras_sysfs_remove(adev, ras_if);
2333 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2334 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2335 		kfree(ras_if);
2336 	}
2337 
2338 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2339 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2340 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2341 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2342 
2343 	amdgpu_gfx_mqd_sw_fini(adev);
2344 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2345 	amdgpu_gfx_kiq_fini(adev);
2346 
2347 	gfx_v9_0_mec_fini(adev);
2348 	gfx_v9_0_ngg_fini(adev);
2349 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2350 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2351 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2352 				&adev->gfx.rlc.cp_table_gpu_addr,
2353 				(void **)&adev->gfx.rlc.cp_table_ptr);
2354 	}
2355 	gfx_v9_0_free_microcode(adev);
2356 
2357 	return 0;
2358 }
2359 
2360 
2361 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2362 {
2363 	/* TODO */
2364 }
2365 
2366 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2367 {
2368 	u32 data;
2369 
2370 	if (instance == 0xffffffff)
2371 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2372 	else
2373 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2374 
2375 	if (se_num == 0xffffffff)
2376 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2377 	else
2378 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2379 
2380 	if (sh_num == 0xffffffff)
2381 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2382 	else
2383 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2384 
2385 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2386 }
2387 
2388 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2389 {
2390 	u32 data, mask;
2391 
2392 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2393 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2394 
2395 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2396 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2397 
2398 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2399 					 adev->gfx.config.max_sh_per_se);
2400 
2401 	return (~data) & mask;
2402 }
2403 
2404 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2405 {
2406 	int i, j;
2407 	u32 data;
2408 	u32 active_rbs = 0;
2409 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2410 					adev->gfx.config.max_sh_per_se;
2411 
2412 	mutex_lock(&adev->grbm_idx_mutex);
2413 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2414 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2415 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2416 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2417 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2418 					       rb_bitmap_width_per_sh);
2419 		}
2420 	}
2421 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2422 	mutex_unlock(&adev->grbm_idx_mutex);
2423 
2424 	adev->gfx.config.backend_enable_mask = active_rbs;
2425 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2426 }
2427 
2428 #define DEFAULT_SH_MEM_BASES	(0x6000)
2429 #define FIRST_COMPUTE_VMID	(8)
2430 #define LAST_COMPUTE_VMID	(16)
2431 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2432 {
2433 	int i;
2434 	uint32_t sh_mem_config;
2435 	uint32_t sh_mem_bases;
2436 
2437 	/*
2438 	 * Configure apertures:
2439 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2440 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2441 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2442 	 */
2443 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2444 
2445 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2446 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2447 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2448 
2449 	mutex_lock(&adev->srbm_mutex);
2450 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2451 		soc15_grbm_select(adev, 0, 0, 0, i);
2452 		/* CP and shaders */
2453 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2454 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2455 	}
2456 	soc15_grbm_select(adev, 0, 0, 0, 0);
2457 	mutex_unlock(&adev->srbm_mutex);
2458 
2459 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2460 	   acccess. These should be enabled by FW for target VMIDs. */
2461 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2462 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2463 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2464 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2465 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2466 	}
2467 }
2468 
2469 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2470 {
2471 	int vmid;
2472 
2473 	/*
2474 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2475 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2476 	 * the driver can enable them for graphics. VMID0 should maintain
2477 	 * access so that HWS firmware can save/restore entries.
2478 	 */
2479 	for (vmid = 1; vmid < 16; vmid++) {
2480 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2481 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2482 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2483 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2484 	}
2485 }
2486 
2487 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2488 {
2489 	u32 tmp;
2490 	int i;
2491 
2492 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2493 
2494 	gfx_v9_0_tiling_mode_table_init(adev);
2495 
2496 	gfx_v9_0_setup_rb(adev);
2497 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2498 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2499 
2500 	/* XXX SH_MEM regs */
2501 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2502 	mutex_lock(&adev->srbm_mutex);
2503 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2504 		soc15_grbm_select(adev, 0, 0, 0, i);
2505 		/* CP and shaders */
2506 		if (i == 0) {
2507 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2508 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2509 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2510 					    !!amdgpu_noretry);
2511 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2512 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2513 		} else {
2514 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2515 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2516 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2517 					    !!amdgpu_noretry);
2518 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2519 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2520 				(adev->gmc.private_aperture_start >> 48));
2521 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2522 				(adev->gmc.shared_aperture_start >> 48));
2523 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2524 		}
2525 	}
2526 	soc15_grbm_select(adev, 0, 0, 0, 0);
2527 
2528 	mutex_unlock(&adev->srbm_mutex);
2529 
2530 	gfx_v9_0_init_compute_vmid(adev);
2531 	gfx_v9_0_init_gds_vmid(adev);
2532 }
2533 
2534 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2535 {
2536 	u32 i, j, k;
2537 	u32 mask;
2538 
2539 	mutex_lock(&adev->grbm_idx_mutex);
2540 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2541 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2542 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2543 			for (k = 0; k < adev->usec_timeout; k++) {
2544 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2545 					break;
2546 				udelay(1);
2547 			}
2548 			if (k == adev->usec_timeout) {
2549 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2550 						      0xffffffff, 0xffffffff);
2551 				mutex_unlock(&adev->grbm_idx_mutex);
2552 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2553 					 i, j);
2554 				return;
2555 			}
2556 		}
2557 	}
2558 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2559 	mutex_unlock(&adev->grbm_idx_mutex);
2560 
2561 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2562 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2563 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2564 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2565 	for (k = 0; k < adev->usec_timeout; k++) {
2566 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2567 			break;
2568 		udelay(1);
2569 	}
2570 }
2571 
2572 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2573 					       bool enable)
2574 {
2575 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2576 
2577 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2578 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2579 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2580 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2581 
2582 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2583 }
2584 
2585 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2586 {
2587 	/* csib */
2588 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2589 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2590 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2591 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2592 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2593 			adev->gfx.rlc.clear_state_size);
2594 }
2595 
2596 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2597 				int indirect_offset,
2598 				int list_size,
2599 				int *unique_indirect_regs,
2600 				int unique_indirect_reg_count,
2601 				int *indirect_start_offsets,
2602 				int *indirect_start_offsets_count,
2603 				int max_start_offsets_count)
2604 {
2605 	int idx;
2606 
2607 	for (; indirect_offset < list_size; indirect_offset++) {
2608 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2609 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2610 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2611 
2612 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2613 			indirect_offset += 2;
2614 
2615 			/* look for the matching indice */
2616 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2617 				if (unique_indirect_regs[idx] ==
2618 					register_list_format[indirect_offset] ||
2619 					!unique_indirect_regs[idx])
2620 					break;
2621 			}
2622 
2623 			BUG_ON(idx >= unique_indirect_reg_count);
2624 
2625 			if (!unique_indirect_regs[idx])
2626 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2627 
2628 			indirect_offset++;
2629 		}
2630 	}
2631 }
2632 
2633 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2634 {
2635 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2636 	int unique_indirect_reg_count = 0;
2637 
2638 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2639 	int indirect_start_offsets_count = 0;
2640 
2641 	int list_size = 0;
2642 	int i = 0, j = 0;
2643 	u32 tmp = 0;
2644 
2645 	u32 *register_list_format =
2646 		kmemdup(adev->gfx.rlc.register_list_format,
2647 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2648 	if (!register_list_format)
2649 		return -ENOMEM;
2650 
2651 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2652 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2653 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2654 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2655 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2656 				    unique_indirect_regs,
2657 				    unique_indirect_reg_count,
2658 				    indirect_start_offsets,
2659 				    &indirect_start_offsets_count,
2660 				    ARRAY_SIZE(indirect_start_offsets));
2661 
2662 	/* enable auto inc in case it is disabled */
2663 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2664 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2665 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2666 
2667 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2668 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2669 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2670 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2671 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2672 			adev->gfx.rlc.register_restore[i]);
2673 
2674 	/* load indirect register */
2675 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2676 		adev->gfx.rlc.reg_list_format_start);
2677 
2678 	/* direct register portion */
2679 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2680 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2681 			register_list_format[i]);
2682 
2683 	/* indirect register portion */
2684 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2685 		if (register_list_format[i] == 0xFFFFFFFF) {
2686 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2687 			continue;
2688 		}
2689 
2690 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2691 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2692 
2693 		for (j = 0; j < unique_indirect_reg_count; j++) {
2694 			if (register_list_format[i] == unique_indirect_regs[j]) {
2695 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2696 				break;
2697 			}
2698 		}
2699 
2700 		BUG_ON(j >= unique_indirect_reg_count);
2701 
2702 		i++;
2703 	}
2704 
2705 	/* set save/restore list size */
2706 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2707 	list_size = list_size >> 1;
2708 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2709 		adev->gfx.rlc.reg_restore_list_size);
2710 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2711 
2712 	/* write the starting offsets to RLC scratch ram */
2713 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2714 		adev->gfx.rlc.starting_offsets_start);
2715 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2716 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2717 		       indirect_start_offsets[i]);
2718 
2719 	/* load unique indirect regs*/
2720 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2721 		if (unique_indirect_regs[i] != 0) {
2722 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2723 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2724 			       unique_indirect_regs[i] & 0x3FFFF);
2725 
2726 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2727 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2728 			       unique_indirect_regs[i] >> 20);
2729 		}
2730 	}
2731 
2732 	kfree(register_list_format);
2733 	return 0;
2734 }
2735 
2736 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2737 {
2738 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2739 }
2740 
2741 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2742 					     bool enable)
2743 {
2744 	uint32_t data = 0;
2745 	uint32_t default_data = 0;
2746 
2747 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2748 	if (enable == true) {
2749 		/* enable GFXIP control over CGPG */
2750 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2751 		if(default_data != data)
2752 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2753 
2754 		/* update status */
2755 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2756 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2757 		if(default_data != data)
2758 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2759 	} else {
2760 		/* restore GFXIP control over GCPG */
2761 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2762 		if(default_data != data)
2763 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2764 	}
2765 }
2766 
2767 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2768 {
2769 	uint32_t data = 0;
2770 
2771 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2772 			      AMD_PG_SUPPORT_GFX_SMG |
2773 			      AMD_PG_SUPPORT_GFX_DMG)) {
2774 		/* init IDLE_POLL_COUNT = 60 */
2775 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2776 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2777 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2778 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2779 
2780 		/* init RLC PG Delay */
2781 		data = 0;
2782 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2783 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2784 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2785 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2786 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2787 
2788 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2789 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2790 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2791 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2792 
2793 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2794 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2795 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2796 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2797 
2798 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2799 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2800 
2801 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2802 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2803 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2804 
2805 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2806 	}
2807 }
2808 
2809 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2810 						bool enable)
2811 {
2812 	uint32_t data = 0;
2813 	uint32_t default_data = 0;
2814 
2815 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2816 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2817 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2818 			     enable ? 1 : 0);
2819 	if (default_data != data)
2820 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2821 }
2822 
2823 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2824 						bool enable)
2825 {
2826 	uint32_t data = 0;
2827 	uint32_t default_data = 0;
2828 
2829 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2830 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2831 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2832 			     enable ? 1 : 0);
2833 	if(default_data != data)
2834 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2835 }
2836 
2837 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2838 					bool enable)
2839 {
2840 	uint32_t data = 0;
2841 	uint32_t default_data = 0;
2842 
2843 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2844 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2845 			     CP_PG_DISABLE,
2846 			     enable ? 0 : 1);
2847 	if(default_data != data)
2848 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2849 }
2850 
2851 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2852 						bool enable)
2853 {
2854 	uint32_t data, default_data;
2855 
2856 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2857 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2858 			     GFX_POWER_GATING_ENABLE,
2859 			     enable ? 1 : 0);
2860 	if(default_data != data)
2861 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2862 }
2863 
2864 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2865 						bool enable)
2866 {
2867 	uint32_t data, default_data;
2868 
2869 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2870 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2871 			     GFX_PIPELINE_PG_ENABLE,
2872 			     enable ? 1 : 0);
2873 	if(default_data != data)
2874 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2875 
2876 	if (!enable)
2877 		/* read any GFX register to wake up GFX */
2878 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2879 }
2880 
2881 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2882 						       bool enable)
2883 {
2884 	uint32_t data, default_data;
2885 
2886 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2887 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2888 			     STATIC_PER_CU_PG_ENABLE,
2889 			     enable ? 1 : 0);
2890 	if(default_data != data)
2891 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2892 }
2893 
2894 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2895 						bool enable)
2896 {
2897 	uint32_t data, default_data;
2898 
2899 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2900 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2901 			     DYN_PER_CU_PG_ENABLE,
2902 			     enable ? 1 : 0);
2903 	if(default_data != data)
2904 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2905 }
2906 
2907 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2908 {
2909 	gfx_v9_0_init_csb(adev);
2910 
2911 	/*
2912 	 * Rlc save restore list is workable since v2_1.
2913 	 * And it's needed by gfxoff feature.
2914 	 */
2915 	if (adev->gfx.rlc.is_rlc_v2_1) {
2916 		gfx_v9_1_init_rlc_save_restore_list(adev);
2917 		gfx_v9_0_enable_save_restore_machine(adev);
2918 	}
2919 
2920 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2921 			      AMD_PG_SUPPORT_GFX_SMG |
2922 			      AMD_PG_SUPPORT_GFX_DMG |
2923 			      AMD_PG_SUPPORT_CP |
2924 			      AMD_PG_SUPPORT_GDS |
2925 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2926 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2927 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2928 		gfx_v9_0_init_gfx_power_gating(adev);
2929 	}
2930 }
2931 
2932 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2933 {
2934 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2935 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2936 	gfx_v9_0_wait_for_rlc_serdes(adev);
2937 }
2938 
2939 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2940 {
2941 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2942 	udelay(50);
2943 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2944 	udelay(50);
2945 }
2946 
2947 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2948 {
2949 #ifdef AMDGPU_RLC_DEBUG_RETRY
2950 	u32 rlc_ucode_ver;
2951 #endif
2952 
2953 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2954 	udelay(50);
2955 
2956 	/* carrizo do enable cp interrupt after cp inited */
2957 	if (!(adev->flags & AMD_IS_APU)) {
2958 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2959 		udelay(50);
2960 	}
2961 
2962 #ifdef AMDGPU_RLC_DEBUG_RETRY
2963 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2964 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2965 	if(rlc_ucode_ver == 0x108) {
2966 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2967 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2968 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2969 		 * default is 0x9C4 to create a 100us interval */
2970 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2971 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2972 		 * to disable the page fault retry interrupts, default is
2973 		 * 0x100 (256) */
2974 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2975 	}
2976 #endif
2977 }
2978 
2979 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2980 {
2981 	const struct rlc_firmware_header_v2_0 *hdr;
2982 	const __le32 *fw_data;
2983 	unsigned i, fw_size;
2984 
2985 	if (!adev->gfx.rlc_fw)
2986 		return -EINVAL;
2987 
2988 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2989 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2990 
2991 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2992 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2993 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2994 
2995 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2996 			RLCG_UCODE_LOADING_START_ADDRESS);
2997 	for (i = 0; i < fw_size; i++)
2998 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2999 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3000 
3001 	return 0;
3002 }
3003 
3004 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3005 {
3006 	int r;
3007 
3008 	if (amdgpu_sriov_vf(adev)) {
3009 		gfx_v9_0_init_csb(adev);
3010 		return 0;
3011 	}
3012 
3013 	adev->gfx.rlc.funcs->stop(adev);
3014 
3015 	/* disable CG */
3016 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3017 
3018 	gfx_v9_0_init_pg(adev);
3019 
3020 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3021 		/* legacy rlc firmware loading */
3022 		r = gfx_v9_0_rlc_load_microcode(adev);
3023 		if (r)
3024 			return r;
3025 	}
3026 
3027 	switch (adev->asic_type) {
3028 	case CHIP_RAVEN:
3029 	case CHIP_RENOIR:
3030 		if (amdgpu_lbpw == 0)
3031 			gfx_v9_0_enable_lbpw(adev, false);
3032 		else
3033 			gfx_v9_0_enable_lbpw(adev, true);
3034 		break;
3035 	case CHIP_VEGA20:
3036 		if (amdgpu_lbpw > 0)
3037 			gfx_v9_0_enable_lbpw(adev, true);
3038 		else
3039 			gfx_v9_0_enable_lbpw(adev, false);
3040 		break;
3041 	default:
3042 		break;
3043 	}
3044 
3045 	adev->gfx.rlc.funcs->start(adev);
3046 
3047 	return 0;
3048 }
3049 
3050 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3051 {
3052 	int i;
3053 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3054 
3055 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3056 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3057 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3058 	if (!enable) {
3059 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3060 			adev->gfx.gfx_ring[i].sched.ready = false;
3061 	}
3062 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3063 	udelay(50);
3064 }
3065 
3066 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3067 {
3068 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3069 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3070 	const struct gfx_firmware_header_v1_0 *me_hdr;
3071 	const __le32 *fw_data;
3072 	unsigned i, fw_size;
3073 
3074 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3075 		return -EINVAL;
3076 
3077 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3078 		adev->gfx.pfp_fw->data;
3079 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3080 		adev->gfx.ce_fw->data;
3081 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3082 		adev->gfx.me_fw->data;
3083 
3084 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3085 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3086 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3087 
3088 	gfx_v9_0_cp_gfx_enable(adev, false);
3089 
3090 	/* PFP */
3091 	fw_data = (const __le32 *)
3092 		(adev->gfx.pfp_fw->data +
3093 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3094 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3095 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3096 	for (i = 0; i < fw_size; i++)
3097 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3098 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3099 
3100 	/* CE */
3101 	fw_data = (const __le32 *)
3102 		(adev->gfx.ce_fw->data +
3103 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3104 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3105 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3106 	for (i = 0; i < fw_size; i++)
3107 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3108 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3109 
3110 	/* ME */
3111 	fw_data = (const __le32 *)
3112 		(adev->gfx.me_fw->data +
3113 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3114 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3115 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3116 	for (i = 0; i < fw_size; i++)
3117 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3118 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3119 
3120 	return 0;
3121 }
3122 
3123 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3124 {
3125 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3126 	const struct cs_section_def *sect = NULL;
3127 	const struct cs_extent_def *ext = NULL;
3128 	int r, i, tmp;
3129 
3130 	/* init the CP */
3131 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3132 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3133 
3134 	gfx_v9_0_cp_gfx_enable(adev, true);
3135 
3136 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3137 	if (r) {
3138 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3139 		return r;
3140 	}
3141 
3142 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3143 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3144 
3145 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3146 	amdgpu_ring_write(ring, 0x80000000);
3147 	amdgpu_ring_write(ring, 0x80000000);
3148 
3149 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3150 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3151 			if (sect->id == SECT_CONTEXT) {
3152 				amdgpu_ring_write(ring,
3153 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3154 					       ext->reg_count));
3155 				amdgpu_ring_write(ring,
3156 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3157 				for (i = 0; i < ext->reg_count; i++)
3158 					amdgpu_ring_write(ring, ext->extent[i]);
3159 			}
3160 		}
3161 	}
3162 
3163 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3164 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3165 
3166 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3167 	amdgpu_ring_write(ring, 0);
3168 
3169 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3170 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3171 	amdgpu_ring_write(ring, 0x8000);
3172 	amdgpu_ring_write(ring, 0x8000);
3173 
3174 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3175 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3176 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3177 	amdgpu_ring_write(ring, tmp);
3178 	amdgpu_ring_write(ring, 0);
3179 
3180 	amdgpu_ring_commit(ring);
3181 
3182 	return 0;
3183 }
3184 
3185 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3186 {
3187 	struct amdgpu_ring *ring;
3188 	u32 tmp;
3189 	u32 rb_bufsz;
3190 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3191 
3192 	/* Set the write pointer delay */
3193 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3194 
3195 	/* set the RB to use vmid 0 */
3196 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3197 
3198 	/* Set ring buffer size */
3199 	ring = &adev->gfx.gfx_ring[0];
3200 	rb_bufsz = order_base_2(ring->ring_size / 8);
3201 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3202 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3203 #ifdef __BIG_ENDIAN
3204 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3205 #endif
3206 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3207 
3208 	/* Initialize the ring buffer's write pointers */
3209 	ring->wptr = 0;
3210 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3211 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3212 
3213 	/* set the wb address wether it's enabled or not */
3214 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3215 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3216 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3217 
3218 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3219 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3220 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3221 
3222 	mdelay(1);
3223 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3224 
3225 	rb_addr = ring->gpu_addr >> 8;
3226 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3227 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3228 
3229 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3230 	if (ring->use_doorbell) {
3231 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3232 				    DOORBELL_OFFSET, ring->doorbell_index);
3233 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3234 				    DOORBELL_EN, 1);
3235 	} else {
3236 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3237 	}
3238 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3239 
3240 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3241 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3242 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3243 
3244 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3245 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3246 
3247 
3248 	/* start the ring */
3249 	gfx_v9_0_cp_gfx_start(adev);
3250 	ring->sched.ready = true;
3251 
3252 	return 0;
3253 }
3254 
3255 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3256 {
3257 	int i;
3258 
3259 	if (enable) {
3260 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3261 	} else {
3262 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3263 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3264 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3265 			adev->gfx.compute_ring[i].sched.ready = false;
3266 		adev->gfx.kiq.ring.sched.ready = false;
3267 	}
3268 	udelay(50);
3269 }
3270 
3271 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3272 {
3273 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3274 	const __le32 *fw_data;
3275 	unsigned i;
3276 	u32 tmp;
3277 
3278 	if (!adev->gfx.mec_fw)
3279 		return -EINVAL;
3280 
3281 	gfx_v9_0_cp_compute_enable(adev, false);
3282 
3283 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3284 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3285 
3286 	fw_data = (const __le32 *)
3287 		(adev->gfx.mec_fw->data +
3288 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3289 	tmp = 0;
3290 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3291 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3292 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3293 
3294 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3295 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3296 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3297 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3298 
3299 	/* MEC1 */
3300 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3301 			 mec_hdr->jt_offset);
3302 	for (i = 0; i < mec_hdr->jt_size; i++)
3303 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3304 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3305 
3306 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3307 			adev->gfx.mec_fw_version);
3308 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3309 
3310 	return 0;
3311 }
3312 
3313 /* KIQ functions */
3314 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3315 {
3316 	uint32_t tmp;
3317 	struct amdgpu_device *adev = ring->adev;
3318 
3319 	/* tell RLC which is KIQ queue */
3320 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3321 	tmp &= 0xffffff00;
3322 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3323 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3324 	tmp |= 0x80;
3325 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3326 }
3327 
3328 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3329 {
3330 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3331 	uint64_t queue_mask = 0;
3332 	int r, i;
3333 
3334 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3335 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3336 			continue;
3337 
3338 		/* This situation may be hit in the future if a new HW
3339 		 * generation exposes more than 64 queues. If so, the
3340 		 * definition of queue_mask needs updating */
3341 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3342 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3343 			break;
3344 		}
3345 
3346 		queue_mask |= (1ull << i);
3347 	}
3348 
3349 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3350 	if (r) {
3351 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3352 		return r;
3353 	}
3354 
3355 	/* set resources */
3356 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3357 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3358 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3359 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3360 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3361 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3362 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3363 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3364 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3365 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3366 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3367 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3368 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3369 
3370 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3371 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3372 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3373 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3374 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3375 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3376 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3377 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3378 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3379 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3380 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3381 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3382 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3383 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3384 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3385 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3386 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3387 	}
3388 
3389 	r = amdgpu_ring_test_helper(kiq_ring);
3390 	if (r)
3391 		DRM_ERROR("KCQ enable failed\n");
3392 
3393 	return r;
3394 }
3395 
3396 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3397 {
3398 	struct amdgpu_device *adev = ring->adev;
3399 	struct v9_mqd *mqd = ring->mqd_ptr;
3400 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3401 	uint32_t tmp;
3402 
3403 	mqd->header = 0xC0310800;
3404 	mqd->compute_pipelinestat_enable = 0x00000001;
3405 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3406 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3407 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3408 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3409 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3410 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3411 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3412 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3413 	mqd->compute_misc_reserved = 0x00000003;
3414 
3415 	mqd->dynamic_cu_mask_addr_lo =
3416 		lower_32_bits(ring->mqd_gpu_addr
3417 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3418 	mqd->dynamic_cu_mask_addr_hi =
3419 		upper_32_bits(ring->mqd_gpu_addr
3420 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3421 
3422 	eop_base_addr = ring->eop_gpu_addr >> 8;
3423 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3424 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3425 
3426 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3427 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3428 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3429 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3430 
3431 	mqd->cp_hqd_eop_control = tmp;
3432 
3433 	/* enable doorbell? */
3434 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3435 
3436 	if (ring->use_doorbell) {
3437 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438 				    DOORBELL_OFFSET, ring->doorbell_index);
3439 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3440 				    DOORBELL_EN, 1);
3441 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3442 				    DOORBELL_SOURCE, 0);
3443 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3444 				    DOORBELL_HIT, 0);
3445 	} else {
3446 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447 					 DOORBELL_EN, 0);
3448 	}
3449 
3450 	mqd->cp_hqd_pq_doorbell_control = tmp;
3451 
3452 	/* disable the queue if it's active */
3453 	ring->wptr = 0;
3454 	mqd->cp_hqd_dequeue_request = 0;
3455 	mqd->cp_hqd_pq_rptr = 0;
3456 	mqd->cp_hqd_pq_wptr_lo = 0;
3457 	mqd->cp_hqd_pq_wptr_hi = 0;
3458 
3459 	/* set the pointer to the MQD */
3460 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3461 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3462 
3463 	/* set MQD vmid to 0 */
3464 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3465 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3466 	mqd->cp_mqd_control = tmp;
3467 
3468 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3469 	hqd_gpu_addr = ring->gpu_addr >> 8;
3470 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3471 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3472 
3473 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3474 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3475 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3476 			    (order_base_2(ring->ring_size / 4) - 1));
3477 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3478 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3479 #ifdef __BIG_ENDIAN
3480 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3481 #endif
3482 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3483 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3484 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3485 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3486 	mqd->cp_hqd_pq_control = tmp;
3487 
3488 	/* set the wb address whether it's enabled or not */
3489 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3490 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3491 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3492 		upper_32_bits(wb_gpu_addr) & 0xffff;
3493 
3494 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3495 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3496 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3497 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3498 
3499 	tmp = 0;
3500 	/* enable the doorbell if requested */
3501 	if (ring->use_doorbell) {
3502 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3503 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3504 				DOORBELL_OFFSET, ring->doorbell_index);
3505 
3506 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507 					 DOORBELL_EN, 1);
3508 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509 					 DOORBELL_SOURCE, 0);
3510 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3511 					 DOORBELL_HIT, 0);
3512 	}
3513 
3514 	mqd->cp_hqd_pq_doorbell_control = tmp;
3515 
3516 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3517 	ring->wptr = 0;
3518 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3519 
3520 	/* set the vmid for the queue */
3521 	mqd->cp_hqd_vmid = 0;
3522 
3523 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3524 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3525 	mqd->cp_hqd_persistent_state = tmp;
3526 
3527 	/* set MIN_IB_AVAIL_SIZE */
3528 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3529 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3530 	mqd->cp_hqd_ib_control = tmp;
3531 
3532 	/* activate the queue */
3533 	mqd->cp_hqd_active = 1;
3534 
3535 	return 0;
3536 }
3537 
3538 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3539 {
3540 	struct amdgpu_device *adev = ring->adev;
3541 	struct v9_mqd *mqd = ring->mqd_ptr;
3542 	int j;
3543 
3544 	/* disable wptr polling */
3545 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3546 
3547 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3548 	       mqd->cp_hqd_eop_base_addr_lo);
3549 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3550 	       mqd->cp_hqd_eop_base_addr_hi);
3551 
3552 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3553 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3554 	       mqd->cp_hqd_eop_control);
3555 
3556 	/* enable doorbell? */
3557 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3558 	       mqd->cp_hqd_pq_doorbell_control);
3559 
3560 	/* disable the queue if it's active */
3561 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3562 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3563 		for (j = 0; j < adev->usec_timeout; j++) {
3564 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3565 				break;
3566 			udelay(1);
3567 		}
3568 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3569 		       mqd->cp_hqd_dequeue_request);
3570 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3571 		       mqd->cp_hqd_pq_rptr);
3572 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3573 		       mqd->cp_hqd_pq_wptr_lo);
3574 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3575 		       mqd->cp_hqd_pq_wptr_hi);
3576 	}
3577 
3578 	/* set the pointer to the MQD */
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3580 	       mqd->cp_mqd_base_addr_lo);
3581 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3582 	       mqd->cp_mqd_base_addr_hi);
3583 
3584 	/* set MQD vmid to 0 */
3585 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3586 	       mqd->cp_mqd_control);
3587 
3588 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3589 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3590 	       mqd->cp_hqd_pq_base_lo);
3591 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3592 	       mqd->cp_hqd_pq_base_hi);
3593 
3594 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3595 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3596 	       mqd->cp_hqd_pq_control);
3597 
3598 	/* set the wb address whether it's enabled or not */
3599 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3600 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3601 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3602 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3603 
3604 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3605 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3606 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3607 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3608 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3609 
3610 	/* enable the doorbell if requested */
3611 	if (ring->use_doorbell) {
3612 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3613 					(adev->doorbell_index.kiq * 2) << 2);
3614 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3615 					(adev->doorbell_index.userqueue_end * 2) << 2);
3616 	}
3617 
3618 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3619 	       mqd->cp_hqd_pq_doorbell_control);
3620 
3621 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3622 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3623 	       mqd->cp_hqd_pq_wptr_lo);
3624 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3625 	       mqd->cp_hqd_pq_wptr_hi);
3626 
3627 	/* set the vmid for the queue */
3628 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3629 
3630 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3631 	       mqd->cp_hqd_persistent_state);
3632 
3633 	/* activate the queue */
3634 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3635 	       mqd->cp_hqd_active);
3636 
3637 	if (ring->use_doorbell)
3638 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3639 
3640 	return 0;
3641 }
3642 
3643 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3644 {
3645 	struct amdgpu_device *adev = ring->adev;
3646 	int j;
3647 
3648 	/* disable the queue if it's active */
3649 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3650 
3651 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3652 
3653 		for (j = 0; j < adev->usec_timeout; j++) {
3654 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3655 				break;
3656 			udelay(1);
3657 		}
3658 
3659 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3660 			DRM_DEBUG("KIQ dequeue request failed.\n");
3661 
3662 			/* Manual disable if dequeue request times out */
3663 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3664 		}
3665 
3666 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3667 		      0);
3668 	}
3669 
3670 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3672 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3674 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3675 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3676 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3677 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3678 
3679 	return 0;
3680 }
3681 
3682 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3683 {
3684 	struct amdgpu_device *adev = ring->adev;
3685 	struct v9_mqd *mqd = ring->mqd_ptr;
3686 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3687 
3688 	gfx_v9_0_kiq_setting(ring);
3689 
3690 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3691 		/* reset MQD to a clean status */
3692 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3693 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3694 
3695 		/* reset ring buffer */
3696 		ring->wptr = 0;
3697 		amdgpu_ring_clear_ring(ring);
3698 
3699 		mutex_lock(&adev->srbm_mutex);
3700 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3701 		gfx_v9_0_kiq_init_register(ring);
3702 		soc15_grbm_select(adev, 0, 0, 0, 0);
3703 		mutex_unlock(&adev->srbm_mutex);
3704 	} else {
3705 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3706 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3707 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3708 		mutex_lock(&adev->srbm_mutex);
3709 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3710 		gfx_v9_0_mqd_init(ring);
3711 		gfx_v9_0_kiq_init_register(ring);
3712 		soc15_grbm_select(adev, 0, 0, 0, 0);
3713 		mutex_unlock(&adev->srbm_mutex);
3714 
3715 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3716 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3717 	}
3718 
3719 	return 0;
3720 }
3721 
3722 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3723 {
3724 	struct amdgpu_device *adev = ring->adev;
3725 	struct v9_mqd *mqd = ring->mqd_ptr;
3726 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3727 
3728 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3729 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3730 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3731 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3732 		mutex_lock(&adev->srbm_mutex);
3733 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3734 		gfx_v9_0_mqd_init(ring);
3735 		soc15_grbm_select(adev, 0, 0, 0, 0);
3736 		mutex_unlock(&adev->srbm_mutex);
3737 
3738 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3739 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3740 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3741 		/* reset MQD to a clean status */
3742 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3743 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3744 
3745 		/* reset ring buffer */
3746 		ring->wptr = 0;
3747 		amdgpu_ring_clear_ring(ring);
3748 	} else {
3749 		amdgpu_ring_clear_ring(ring);
3750 	}
3751 
3752 	return 0;
3753 }
3754 
3755 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3756 {
3757 	struct amdgpu_ring *ring;
3758 	int r;
3759 
3760 	ring = &adev->gfx.kiq.ring;
3761 
3762 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3763 	if (unlikely(r != 0))
3764 		return r;
3765 
3766 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3767 	if (unlikely(r != 0))
3768 		return r;
3769 
3770 	gfx_v9_0_kiq_init_queue(ring);
3771 	amdgpu_bo_kunmap(ring->mqd_obj);
3772 	ring->mqd_ptr = NULL;
3773 	amdgpu_bo_unreserve(ring->mqd_obj);
3774 	ring->sched.ready = true;
3775 	return 0;
3776 }
3777 
3778 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3779 {
3780 	struct amdgpu_ring *ring = NULL;
3781 	int r = 0, i;
3782 
3783 	gfx_v9_0_cp_compute_enable(adev, true);
3784 
3785 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3786 		ring = &adev->gfx.compute_ring[i];
3787 
3788 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3789 		if (unlikely(r != 0))
3790 			goto done;
3791 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3792 		if (!r) {
3793 			r = gfx_v9_0_kcq_init_queue(ring);
3794 			amdgpu_bo_kunmap(ring->mqd_obj);
3795 			ring->mqd_ptr = NULL;
3796 		}
3797 		amdgpu_bo_unreserve(ring->mqd_obj);
3798 		if (r)
3799 			goto done;
3800 	}
3801 
3802 	r = gfx_v9_0_kiq_kcq_enable(adev);
3803 done:
3804 	return r;
3805 }
3806 
3807 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3808 {
3809 	int r, i;
3810 	struct amdgpu_ring *ring;
3811 
3812 	if (!(adev->flags & AMD_IS_APU))
3813 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3814 
3815 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3816 		if (adev->asic_type != CHIP_ARCTURUS) {
3817 			/* legacy firmware loading */
3818 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3819 			if (r)
3820 				return r;
3821 		}
3822 
3823 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3824 		if (r)
3825 			return r;
3826 	}
3827 
3828 	r = gfx_v9_0_kiq_resume(adev);
3829 	if (r)
3830 		return r;
3831 
3832 	if (adev->asic_type != CHIP_ARCTURUS) {
3833 		r = gfx_v9_0_cp_gfx_resume(adev);
3834 		if (r)
3835 			return r;
3836 	}
3837 
3838 	r = gfx_v9_0_kcq_resume(adev);
3839 	if (r)
3840 		return r;
3841 
3842 	if (adev->asic_type != CHIP_ARCTURUS) {
3843 		ring = &adev->gfx.gfx_ring[0];
3844 		r = amdgpu_ring_test_helper(ring);
3845 		if (r)
3846 			return r;
3847 	}
3848 
3849 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3850 		ring = &adev->gfx.compute_ring[i];
3851 		amdgpu_ring_test_helper(ring);
3852 	}
3853 
3854 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3855 
3856 	return 0;
3857 }
3858 
3859 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3860 {
3861 	if (adev->asic_type != CHIP_ARCTURUS)
3862 		gfx_v9_0_cp_gfx_enable(adev, enable);
3863 	gfx_v9_0_cp_compute_enable(adev, enable);
3864 }
3865 
3866 static int gfx_v9_0_hw_init(void *handle)
3867 {
3868 	int r;
3869 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3870 
3871 	if (!amdgpu_sriov_vf(adev))
3872 		gfx_v9_0_init_golden_registers(adev);
3873 
3874 	gfx_v9_0_constants_init(adev);
3875 
3876 	r = gfx_v9_0_csb_vram_pin(adev);
3877 	if (r)
3878 		return r;
3879 
3880 	r = adev->gfx.rlc.funcs->resume(adev);
3881 	if (r)
3882 		return r;
3883 
3884 	r = gfx_v9_0_cp_resume(adev);
3885 	if (r)
3886 		return r;
3887 
3888 	if (adev->asic_type != CHIP_ARCTURUS) {
3889 		r = gfx_v9_0_ngg_en(adev);
3890 		if (r)
3891 			return r;
3892 	}
3893 
3894 	return r;
3895 }
3896 
3897 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3898 {
3899 	int r, i;
3900 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3901 
3902 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3903 	if (r)
3904 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3905 
3906 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3907 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3908 
3909 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3910 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3911 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3912 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3913 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3914 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3915 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3916 		amdgpu_ring_write(kiq_ring, 0);
3917 		amdgpu_ring_write(kiq_ring, 0);
3918 		amdgpu_ring_write(kiq_ring, 0);
3919 	}
3920 	r = amdgpu_ring_test_helper(kiq_ring);
3921 	if (r)
3922 		DRM_ERROR("KCQ disable failed\n");
3923 
3924 	return r;
3925 }
3926 
3927 static int gfx_v9_0_hw_fini(void *handle)
3928 {
3929 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3930 
3931 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3932 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3933 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3934 
3935 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3936 	gfx_v9_0_kcq_disable(adev);
3937 
3938 	if (amdgpu_sriov_vf(adev)) {
3939 		gfx_v9_0_cp_gfx_enable(adev, false);
3940 		/* must disable polling for SRIOV when hw finished, otherwise
3941 		 * CPC engine may still keep fetching WB address which is already
3942 		 * invalid after sw finished and trigger DMAR reading error in
3943 		 * hypervisor side.
3944 		 */
3945 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3946 		return 0;
3947 	}
3948 
3949 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3950 	 * otherwise KIQ is hanging when binding back
3951 	 */
3952 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3953 		mutex_lock(&adev->srbm_mutex);
3954 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3955 				adev->gfx.kiq.ring.pipe,
3956 				adev->gfx.kiq.ring.queue, 0);
3957 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3958 		soc15_grbm_select(adev, 0, 0, 0, 0);
3959 		mutex_unlock(&adev->srbm_mutex);
3960 	}
3961 
3962 	gfx_v9_0_cp_enable(adev, false);
3963 	adev->gfx.rlc.funcs->stop(adev);
3964 
3965 	gfx_v9_0_csb_vram_unpin(adev);
3966 
3967 	return 0;
3968 }
3969 
3970 static int gfx_v9_0_suspend(void *handle)
3971 {
3972 	return gfx_v9_0_hw_fini(handle);
3973 }
3974 
3975 static int gfx_v9_0_resume(void *handle)
3976 {
3977 	return gfx_v9_0_hw_init(handle);
3978 }
3979 
3980 static bool gfx_v9_0_is_idle(void *handle)
3981 {
3982 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3983 
3984 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3985 				GRBM_STATUS, GUI_ACTIVE))
3986 		return false;
3987 	else
3988 		return true;
3989 }
3990 
3991 static int gfx_v9_0_wait_for_idle(void *handle)
3992 {
3993 	unsigned i;
3994 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3995 
3996 	for (i = 0; i < adev->usec_timeout; i++) {
3997 		if (gfx_v9_0_is_idle(handle))
3998 			return 0;
3999 		udelay(1);
4000 	}
4001 	return -ETIMEDOUT;
4002 }
4003 
4004 static int gfx_v9_0_soft_reset(void *handle)
4005 {
4006 	u32 grbm_soft_reset = 0;
4007 	u32 tmp;
4008 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4009 
4010 	/* GRBM_STATUS */
4011 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4012 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4013 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4014 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4015 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4016 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4017 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4018 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4019 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4020 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4021 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4022 	}
4023 
4024 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4025 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4026 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4027 	}
4028 
4029 	/* GRBM_STATUS2 */
4030 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4031 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4032 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4033 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4034 
4035 
4036 	if (grbm_soft_reset) {
4037 		/* stop the rlc */
4038 		adev->gfx.rlc.funcs->stop(adev);
4039 
4040 		if (adev->asic_type != CHIP_ARCTURUS)
4041 			/* Disable GFX parsing/prefetching */
4042 			gfx_v9_0_cp_gfx_enable(adev, false);
4043 
4044 		/* Disable MEC parsing/prefetching */
4045 		gfx_v9_0_cp_compute_enable(adev, false);
4046 
4047 		if (grbm_soft_reset) {
4048 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4049 			tmp |= grbm_soft_reset;
4050 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4051 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4052 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4053 
4054 			udelay(50);
4055 
4056 			tmp &= ~grbm_soft_reset;
4057 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4058 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4059 		}
4060 
4061 		/* Wait a little for things to settle down */
4062 		udelay(50);
4063 	}
4064 	return 0;
4065 }
4066 
4067 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4068 {
4069 	uint64_t clock;
4070 
4071 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4072 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4073 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4074 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4075 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4076 	return clock;
4077 }
4078 
4079 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4080 					  uint32_t vmid,
4081 					  uint32_t gds_base, uint32_t gds_size,
4082 					  uint32_t gws_base, uint32_t gws_size,
4083 					  uint32_t oa_base, uint32_t oa_size)
4084 {
4085 	struct amdgpu_device *adev = ring->adev;
4086 
4087 	/* GDS Base */
4088 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4089 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4090 				   gds_base);
4091 
4092 	/* GDS Size */
4093 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4094 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4095 				   gds_size);
4096 
4097 	/* GWS */
4098 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4099 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4100 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4101 
4102 	/* OA */
4103 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4104 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4105 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4106 }
4107 
4108 static const u32 vgpr_init_compute_shader[] =
4109 {
4110 	0xb07c0000, 0xbe8000ff,
4111 	0x000000f8, 0xbf110800,
4112 	0x7e000280, 0x7e020280,
4113 	0x7e040280, 0x7e060280,
4114 	0x7e080280, 0x7e0a0280,
4115 	0x7e0c0280, 0x7e0e0280,
4116 	0x80808800, 0xbe803200,
4117 	0xbf84fff5, 0xbf9c0000,
4118 	0xd28c0001, 0x0001007f,
4119 	0xd28d0001, 0x0002027e,
4120 	0x10020288, 0xb8810904,
4121 	0xb7814000, 0xd1196a01,
4122 	0x00000301, 0xbe800087,
4123 	0xbefc00c1, 0xd89c4000,
4124 	0x00020201, 0xd89cc080,
4125 	0x00040401, 0x320202ff,
4126 	0x00000800, 0x80808100,
4127 	0xbf84fff8, 0x7e020280,
4128 	0xbf810000, 0x00000000,
4129 };
4130 
4131 static const u32 sgpr_init_compute_shader[] =
4132 {
4133 	0xb07c0000, 0xbe8000ff,
4134 	0x0000005f, 0xbee50080,
4135 	0xbe812c65, 0xbe822c65,
4136 	0xbe832c65, 0xbe842c65,
4137 	0xbe852c65, 0xb77c0005,
4138 	0x80808500, 0xbf84fff8,
4139 	0xbe800080, 0xbf810000,
4140 };
4141 
4142 static const struct soc15_reg_entry vgpr_init_regs[] = {
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4151    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4152    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4153 };
4154 
4155 static const struct soc15_reg_entry sgpr_init_regs[] = {
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4164    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4165    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4166 };
4167 
4168 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4169    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4173    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4174    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4175    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4176    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4177    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4178    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4179    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4180    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4181    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4183    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4185    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4186    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4187    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4188    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4189    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4191    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4192    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4193    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4194    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4195    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4196    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4198    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4199    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4200    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4201 };
4202 
4203 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4204 {
4205 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4206 	int i, r;
4207 
4208 	r = amdgpu_ring_alloc(ring, 7);
4209 	if (r) {
4210 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4211 			ring->name, r);
4212 		return r;
4213 	}
4214 
4215 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4216 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4217 
4218 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4219 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4220 				PACKET3_DMA_DATA_DST_SEL(1) |
4221 				PACKET3_DMA_DATA_SRC_SEL(2) |
4222 				PACKET3_DMA_DATA_ENGINE(0)));
4223 	amdgpu_ring_write(ring, 0);
4224 	amdgpu_ring_write(ring, 0);
4225 	amdgpu_ring_write(ring, 0);
4226 	amdgpu_ring_write(ring, 0);
4227 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4228 				adev->gds.gds_size);
4229 
4230 	amdgpu_ring_commit(ring);
4231 
4232 	for (i = 0; i < adev->usec_timeout; i++) {
4233 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4234 			break;
4235 		udelay(1);
4236 	}
4237 
4238 	if (i >= adev->usec_timeout)
4239 		r = -ETIMEDOUT;
4240 
4241 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4242 
4243 	return r;
4244 }
4245 
4246 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4247 {
4248 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4249 	struct amdgpu_ib ib;
4250 	struct dma_fence *f = NULL;
4251 	int r, i, j, k;
4252 	unsigned total_size, vgpr_offset, sgpr_offset;
4253 	u64 gpu_addr;
4254 
4255 	/* only support when RAS is enabled */
4256 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4257 		return 0;
4258 
4259 	/* bail if the compute ring is not ready */
4260 	if (!ring->sched.ready)
4261 		return 0;
4262 
4263 	total_size =
4264 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4265 	total_size +=
4266 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4267 	total_size = ALIGN(total_size, 256);
4268 	vgpr_offset = total_size;
4269 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4270 	sgpr_offset = total_size;
4271 	total_size += sizeof(sgpr_init_compute_shader);
4272 
4273 	/* allocate an indirect buffer to put the commands in */
4274 	memset(&ib, 0, sizeof(ib));
4275 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4276 	if (r) {
4277 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4278 		return r;
4279 	}
4280 
4281 	/* load the compute shaders */
4282 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4283 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4284 
4285 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4286 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4287 
4288 	/* init the ib length to 0 */
4289 	ib.length_dw = 0;
4290 
4291 	/* VGPR */
4292 	/* write the register state for the compute dispatch */
4293 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4294 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4295 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4296 								- PACKET3_SET_SH_REG_START;
4297 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4298 	}
4299 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4300 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4301 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4302 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4303 							- PACKET3_SET_SH_REG_START;
4304 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4305 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4306 
4307 	/* write dispatch packet */
4308 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4309 	ib.ptr[ib.length_dw++] = 128; /* x */
4310 	ib.ptr[ib.length_dw++] = 1; /* y */
4311 	ib.ptr[ib.length_dw++] = 1; /* z */
4312 	ib.ptr[ib.length_dw++] =
4313 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4314 
4315 	/* write CS partial flush packet */
4316 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4317 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4318 
4319 	/* SGPR */
4320 	/* write the register state for the compute dispatch */
4321 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4322 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4323 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4324 								- PACKET3_SET_SH_REG_START;
4325 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4326 	}
4327 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4328 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4329 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4330 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4331 							- PACKET3_SET_SH_REG_START;
4332 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4333 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4334 
4335 	/* write dispatch packet */
4336 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4337 	ib.ptr[ib.length_dw++] = 128; /* x */
4338 	ib.ptr[ib.length_dw++] = 1; /* y */
4339 	ib.ptr[ib.length_dw++] = 1; /* z */
4340 	ib.ptr[ib.length_dw++] =
4341 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4342 
4343 	/* write CS partial flush packet */
4344 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4345 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4346 
4347 	/* shedule the ib on the ring */
4348 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4349 	if (r) {
4350 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4351 		goto fail;
4352 	}
4353 
4354 	/* wait for the GPU to finish processing the IB */
4355 	r = dma_fence_wait(f, false);
4356 	if (r) {
4357 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4358 		goto fail;
4359 	}
4360 
4361 	/* read back registers to clear the counters */
4362 	mutex_lock(&adev->grbm_idx_mutex);
4363 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4364 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4365 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4366 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4367 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4368 			}
4369 		}
4370 	}
4371 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4372 	mutex_unlock(&adev->grbm_idx_mutex);
4373 
4374 fail:
4375 	amdgpu_ib_free(adev, &ib, NULL);
4376 	dma_fence_put(f);
4377 
4378 	return r;
4379 }
4380 
4381 static int gfx_v9_0_early_init(void *handle)
4382 {
4383 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4384 
4385 	if (adev->asic_type == CHIP_ARCTURUS)
4386 		adev->gfx.num_gfx_rings = 0;
4387 	else
4388 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4389 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4390 	gfx_v9_0_set_ring_funcs(adev);
4391 	gfx_v9_0_set_irq_funcs(adev);
4392 	gfx_v9_0_set_gds_init(adev);
4393 	gfx_v9_0_set_rlc_funcs(adev);
4394 
4395 	return 0;
4396 }
4397 
4398 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4399 		struct ras_err_data *err_data,
4400 		struct amdgpu_iv_entry *entry);
4401 
4402 static int gfx_v9_0_ecc_late_init(void *handle)
4403 {
4404 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4405 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4406 	struct ras_ih_if ih_info = {
4407 		.cb = gfx_v9_0_process_ras_data_cb,
4408 	};
4409 	struct ras_fs_if fs_info = {
4410 		.sysfs_name = "gfx_err_count",
4411 		.debugfs_name = "gfx_err_inject",
4412 	};
4413 	struct ras_common_if ras_block = {
4414 		.block = AMDGPU_RAS_BLOCK__GFX,
4415 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4416 		.sub_block_index = 0,
4417 		.name = "gfx",
4418 	};
4419 	int r;
4420 
4421 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4422 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4423 		return 0;
4424 	}
4425 
4426 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4427 	if (r)
4428 		return r;
4429 
4430 	/* requires IBs so do in late init after IB pool is initialized */
4431 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4432 	if (r)
4433 		return r;
4434 
4435 	/* handle resume path. */
4436 	if (*ras_if) {
4437 		/* resend ras TA enable cmd during resume.
4438 		 * prepare to handle failure.
4439 		 */
4440 		ih_info.head = **ras_if;
4441 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4442 		if (r) {
4443 			if (r == -EAGAIN) {
4444 				/* request a gpu reset. will run again. */
4445 				amdgpu_ras_request_reset_on_boot(adev,
4446 						AMDGPU_RAS_BLOCK__GFX);
4447 				return 0;
4448 			}
4449 			/* fail to enable ras, cleanup all. */
4450 			goto irq;
4451 		}
4452 		/* enable successfully. continue. */
4453 		goto resume;
4454 	}
4455 
4456 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4457 	if (!*ras_if)
4458 		return -ENOMEM;
4459 
4460 	**ras_if = ras_block;
4461 
4462 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4463 	if (r) {
4464 		if (r == -EAGAIN) {
4465 			amdgpu_ras_request_reset_on_boot(adev,
4466 					AMDGPU_RAS_BLOCK__GFX);
4467 			r = 0;
4468 		}
4469 		goto feature;
4470 	}
4471 
4472 	ih_info.head = **ras_if;
4473 	fs_info.head = **ras_if;
4474 
4475 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4476 	if (r)
4477 		goto interrupt;
4478 
4479 	amdgpu_ras_debugfs_create(adev, &fs_info);
4480 
4481 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4482 	if (r)
4483 		goto sysfs;
4484 resume:
4485 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4486 	if (r)
4487 		goto irq;
4488 
4489 	return 0;
4490 irq:
4491 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4492 sysfs:
4493 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4494 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4495 interrupt:
4496 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4497 feature:
4498 	kfree(*ras_if);
4499 	*ras_if = NULL;
4500 	return r;
4501 }
4502 
4503 static int gfx_v9_0_late_init(void *handle)
4504 {
4505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4506 	int r;
4507 
4508 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4509 	if (r)
4510 		return r;
4511 
4512 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4513 	if (r)
4514 		return r;
4515 
4516 	r = gfx_v9_0_ecc_late_init(handle);
4517 	if (r)
4518 		return r;
4519 
4520 	return 0;
4521 }
4522 
4523 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4524 {
4525 	uint32_t rlc_setting;
4526 
4527 	/* if RLC is not enabled, do nothing */
4528 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4529 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4530 		return false;
4531 
4532 	return true;
4533 }
4534 
4535 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4536 {
4537 	uint32_t data;
4538 	unsigned i;
4539 
4540 	data = RLC_SAFE_MODE__CMD_MASK;
4541 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4542 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4543 
4544 	/* wait for RLC_SAFE_MODE */
4545 	for (i = 0; i < adev->usec_timeout; i++) {
4546 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4547 			break;
4548 		udelay(1);
4549 	}
4550 }
4551 
4552 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4553 {
4554 	uint32_t data;
4555 
4556 	data = RLC_SAFE_MODE__CMD_MASK;
4557 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4558 }
4559 
4560 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4561 						bool enable)
4562 {
4563 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4564 
4565 	if (is_support_sw_smu(adev) && !enable)
4566 		smu_set_gfx_cgpg(&adev->smu, enable);
4567 
4568 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4569 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4570 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4571 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4572 	} else {
4573 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4574 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4575 	}
4576 
4577 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4578 }
4579 
4580 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4581 						bool enable)
4582 {
4583 	/* TODO: double check if we need to perform under safe mode */
4584 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4585 
4586 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4587 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4588 	else
4589 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4590 
4591 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4592 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4593 	else
4594 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4595 
4596 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4597 }
4598 
4599 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4600 						      bool enable)
4601 {
4602 	uint32_t data, def;
4603 
4604 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4605 
4606 	/* It is disabled by HW by default */
4607 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4608 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4609 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4610 
4611 		if (adev->asic_type != CHIP_VEGA12)
4612 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4613 
4614 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4615 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4616 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4617 
4618 		/* only for Vega10 & Raven1 */
4619 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4620 
4621 		if (def != data)
4622 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4623 
4624 		/* MGLS is a global flag to control all MGLS in GFX */
4625 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4626 			/* 2 - RLC memory Light sleep */
4627 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4628 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4629 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4630 				if (def != data)
4631 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4632 			}
4633 			/* 3 - CP memory Light sleep */
4634 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4635 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4636 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4637 				if (def != data)
4638 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4639 			}
4640 		}
4641 	} else {
4642 		/* 1 - MGCG_OVERRIDE */
4643 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4644 
4645 		if (adev->asic_type != CHIP_VEGA12)
4646 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4647 
4648 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4649 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4650 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4651 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4652 
4653 		if (def != data)
4654 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4655 
4656 		/* 2 - disable MGLS in RLC */
4657 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4658 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4659 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4660 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4661 		}
4662 
4663 		/* 3 - disable MGLS in CP */
4664 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4665 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4666 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4667 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4668 		}
4669 	}
4670 
4671 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4672 }
4673 
4674 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4675 					   bool enable)
4676 {
4677 	uint32_t data, def;
4678 
4679 	if (adev->asic_type == CHIP_ARCTURUS)
4680 		return;
4681 
4682 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4683 
4684 	/* Enable 3D CGCG/CGLS */
4685 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4686 		/* write cmd to clear cgcg/cgls ov */
4687 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4688 		/* unset CGCG override */
4689 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4690 		/* update CGCG and CGLS override bits */
4691 		if (def != data)
4692 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4693 
4694 		/* enable 3Dcgcg FSM(0x0000363f) */
4695 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4696 
4697 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4698 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4699 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4700 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4701 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4702 		if (def != data)
4703 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4704 
4705 		/* set IDLE_POLL_COUNT(0x00900100) */
4706 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4707 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4708 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4709 		if (def != data)
4710 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4711 	} else {
4712 		/* Disable CGCG/CGLS */
4713 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4714 		/* disable cgcg, cgls should be disabled */
4715 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4716 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4717 		/* disable cgcg and cgls in FSM */
4718 		if (def != data)
4719 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4720 	}
4721 
4722 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4723 }
4724 
4725 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4726 						      bool enable)
4727 {
4728 	uint32_t def, data;
4729 
4730 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4731 
4732 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4733 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4734 		/* unset CGCG override */
4735 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4736 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4737 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4738 		else
4739 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4740 		/* update CGCG and CGLS override bits */
4741 		if (def != data)
4742 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4743 
4744 		/* enable cgcg FSM(0x0000363F) */
4745 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4746 
4747 		if (adev->asic_type == CHIP_ARCTURUS)
4748 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4749 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4750 		else
4751 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4752 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4753 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4754 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4755 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4756 		if (def != data)
4757 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4758 
4759 		/* set IDLE_POLL_COUNT(0x00900100) */
4760 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4761 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4762 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4763 		if (def != data)
4764 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4765 	} else {
4766 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4767 		/* reset CGCG/CGLS bits */
4768 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4769 		/* disable cgcg and cgls in FSM */
4770 		if (def != data)
4771 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4772 	}
4773 
4774 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4775 }
4776 
4777 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4778 					    bool enable)
4779 {
4780 	if (enable) {
4781 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4782 		 * ===  MGCG + MGLS ===
4783 		 */
4784 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4785 		/* ===  CGCG /CGLS for GFX 3D Only === */
4786 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4787 		/* ===  CGCG + CGLS === */
4788 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4789 	} else {
4790 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4791 		 * ===  CGCG + CGLS ===
4792 		 */
4793 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4794 		/* ===  CGCG /CGLS for GFX 3D Only === */
4795 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4796 		/* ===  MGCG + MGLS === */
4797 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4798 	}
4799 	return 0;
4800 }
4801 
4802 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4803 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4804 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4805 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4806 	.init = gfx_v9_0_rlc_init,
4807 	.get_csb_size = gfx_v9_0_get_csb_size,
4808 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4809 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4810 	.resume = gfx_v9_0_rlc_resume,
4811 	.stop = gfx_v9_0_rlc_stop,
4812 	.reset = gfx_v9_0_rlc_reset,
4813 	.start = gfx_v9_0_rlc_start
4814 };
4815 
4816 static int gfx_v9_0_set_powergating_state(void *handle,
4817 					  enum amd_powergating_state state)
4818 {
4819 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4821 
4822 	switch (adev->asic_type) {
4823 	case CHIP_RAVEN:
4824 	case CHIP_RENOIR:
4825 		if (!enable) {
4826 			amdgpu_gfx_off_ctrl(adev, false);
4827 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4828 		}
4829 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4830 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4831 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4832 		} else {
4833 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4834 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4835 		}
4836 
4837 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4838 			gfx_v9_0_enable_cp_power_gating(adev, true);
4839 		else
4840 			gfx_v9_0_enable_cp_power_gating(adev, false);
4841 
4842 		/* update gfx cgpg state */
4843 		if (is_support_sw_smu(adev) && enable)
4844 			smu_set_gfx_cgpg(&adev->smu, enable);
4845 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4846 
4847 		/* update mgcg state */
4848 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4849 
4850 		if (enable)
4851 			amdgpu_gfx_off_ctrl(adev, true);
4852 		break;
4853 	case CHIP_VEGA12:
4854 		if (!enable) {
4855 			amdgpu_gfx_off_ctrl(adev, false);
4856 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4857 		} else {
4858 			amdgpu_gfx_off_ctrl(adev, true);
4859 		}
4860 		break;
4861 	default:
4862 		break;
4863 	}
4864 
4865 	return 0;
4866 }
4867 
4868 static int gfx_v9_0_set_clockgating_state(void *handle,
4869 					  enum amd_clockgating_state state)
4870 {
4871 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4872 
4873 	if (amdgpu_sriov_vf(adev))
4874 		return 0;
4875 
4876 	switch (adev->asic_type) {
4877 	case CHIP_VEGA10:
4878 	case CHIP_VEGA12:
4879 	case CHIP_VEGA20:
4880 	case CHIP_RAVEN:
4881 	case CHIP_ARCTURUS:
4882 	case CHIP_RENOIR:
4883 		gfx_v9_0_update_gfx_clock_gating(adev,
4884 						 state == AMD_CG_STATE_GATE ? true : false);
4885 		break;
4886 	default:
4887 		break;
4888 	}
4889 	return 0;
4890 }
4891 
4892 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4893 {
4894 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4895 	int data;
4896 
4897 	if (amdgpu_sriov_vf(adev))
4898 		*flags = 0;
4899 
4900 	/* AMD_CG_SUPPORT_GFX_MGCG */
4901 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4902 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4903 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4904 
4905 	/* AMD_CG_SUPPORT_GFX_CGCG */
4906 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4907 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4908 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4909 
4910 	/* AMD_CG_SUPPORT_GFX_CGLS */
4911 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4912 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4913 
4914 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4915 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4916 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4917 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4918 
4919 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4920 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4921 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4922 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4923 
4924 	if (adev->asic_type != CHIP_ARCTURUS) {
4925 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4926 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4927 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4928 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4929 
4930 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4931 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4932 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4933 	}
4934 }
4935 
4936 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4937 {
4938 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4939 }
4940 
4941 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4942 {
4943 	struct amdgpu_device *adev = ring->adev;
4944 	u64 wptr;
4945 
4946 	/* XXX check if swapping is necessary on BE */
4947 	if (ring->use_doorbell) {
4948 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4949 	} else {
4950 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4951 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4952 	}
4953 
4954 	return wptr;
4955 }
4956 
4957 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4958 {
4959 	struct amdgpu_device *adev = ring->adev;
4960 
4961 	if (ring->use_doorbell) {
4962 		/* XXX check if swapping is necessary on BE */
4963 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4964 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4965 	} else {
4966 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4967 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4968 	}
4969 }
4970 
4971 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4972 {
4973 	struct amdgpu_device *adev = ring->adev;
4974 	u32 ref_and_mask, reg_mem_engine;
4975 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4976 
4977 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4978 		switch (ring->me) {
4979 		case 1:
4980 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4981 			break;
4982 		case 2:
4983 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4984 			break;
4985 		default:
4986 			return;
4987 		}
4988 		reg_mem_engine = 0;
4989 	} else {
4990 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4991 		reg_mem_engine = 1; /* pfp */
4992 	}
4993 
4994 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4995 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4996 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4997 			      ref_and_mask, ref_and_mask, 0x20);
4998 }
4999 
5000 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5001 					struct amdgpu_job *job,
5002 					struct amdgpu_ib *ib,
5003 					uint32_t flags)
5004 {
5005 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5006 	u32 header, control = 0;
5007 
5008 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5009 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5010 	else
5011 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5012 
5013 	control |= ib->length_dw | (vmid << 24);
5014 
5015 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5016 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5017 
5018 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5019 			gfx_v9_0_ring_emit_de_meta(ring);
5020 	}
5021 
5022 	amdgpu_ring_write(ring, header);
5023 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5024 	amdgpu_ring_write(ring,
5025 #ifdef __BIG_ENDIAN
5026 		(2 << 0) |
5027 #endif
5028 		lower_32_bits(ib->gpu_addr));
5029 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5030 	amdgpu_ring_write(ring, control);
5031 }
5032 
5033 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5034 					  struct amdgpu_job *job,
5035 					  struct amdgpu_ib *ib,
5036 					  uint32_t flags)
5037 {
5038 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5039 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5040 
5041 	/* Currently, there is a high possibility to get wave ID mismatch
5042 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5043 	 * different wave IDs than the GDS expects. This situation happens
5044 	 * randomly when at least 5 compute pipes use GDS ordered append.
5045 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5046 	 * Those are probably bugs somewhere else in the kernel driver.
5047 	 *
5048 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5049 	 * GDS to 0 for this ring (me/pipe).
5050 	 */
5051 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5052 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5053 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5054 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5055 	}
5056 
5057 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5058 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5059 	amdgpu_ring_write(ring,
5060 #ifdef __BIG_ENDIAN
5061 				(2 << 0) |
5062 #endif
5063 				lower_32_bits(ib->gpu_addr));
5064 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5065 	amdgpu_ring_write(ring, control);
5066 }
5067 
5068 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5069 				     u64 seq, unsigned flags)
5070 {
5071 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5072 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5073 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5074 
5075 	/* RELEASE_MEM - flush caches, send int */
5076 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5077 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5078 					       EOP_TC_NC_ACTION_EN) :
5079 					      (EOP_TCL1_ACTION_EN |
5080 					       EOP_TC_ACTION_EN |
5081 					       EOP_TC_WB_ACTION_EN |
5082 					       EOP_TC_MD_ACTION_EN)) |
5083 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5084 				 EVENT_INDEX(5)));
5085 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5086 
5087 	/*
5088 	 * the address should be Qword aligned if 64bit write, Dword
5089 	 * aligned if only send 32bit data low (discard data high)
5090 	 */
5091 	if (write64bit)
5092 		BUG_ON(addr & 0x7);
5093 	else
5094 		BUG_ON(addr & 0x3);
5095 	amdgpu_ring_write(ring, lower_32_bits(addr));
5096 	amdgpu_ring_write(ring, upper_32_bits(addr));
5097 	amdgpu_ring_write(ring, lower_32_bits(seq));
5098 	amdgpu_ring_write(ring, upper_32_bits(seq));
5099 	amdgpu_ring_write(ring, 0);
5100 }
5101 
5102 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5103 {
5104 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5105 	uint32_t seq = ring->fence_drv.sync_seq;
5106 	uint64_t addr = ring->fence_drv.gpu_addr;
5107 
5108 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5109 			      lower_32_bits(addr), upper_32_bits(addr),
5110 			      seq, 0xffffffff, 4);
5111 }
5112 
5113 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5114 					unsigned vmid, uint64_t pd_addr)
5115 {
5116 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5117 
5118 	/* compute doesn't have PFP */
5119 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5120 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5121 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5122 		amdgpu_ring_write(ring, 0x0);
5123 	}
5124 }
5125 
5126 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5127 {
5128 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5129 }
5130 
5131 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5132 {
5133 	u64 wptr;
5134 
5135 	/* XXX check if swapping is necessary on BE */
5136 	if (ring->use_doorbell)
5137 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5138 	else
5139 		BUG();
5140 	return wptr;
5141 }
5142 
5143 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5144 					   bool acquire)
5145 {
5146 	struct amdgpu_device *adev = ring->adev;
5147 	int pipe_num, tmp, reg;
5148 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5149 
5150 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5151 
5152 	/* first me only has 2 entries, GFX and HP3D */
5153 	if (ring->me > 0)
5154 		pipe_num -= 2;
5155 
5156 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5157 	tmp = RREG32(reg);
5158 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5159 	WREG32(reg, tmp);
5160 }
5161 
5162 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5163 					    struct amdgpu_ring *ring,
5164 					    bool acquire)
5165 {
5166 	int i, pipe;
5167 	bool reserve;
5168 	struct amdgpu_ring *iring;
5169 
5170 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5171 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5172 	if (acquire)
5173 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5174 	else
5175 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5176 
5177 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5178 		/* Clear all reservations - everyone reacquires all resources */
5179 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5180 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5181 						       true);
5182 
5183 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5184 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5185 						       true);
5186 	} else {
5187 		/* Lower all pipes without a current reservation */
5188 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5189 			iring = &adev->gfx.gfx_ring[i];
5190 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5191 							   iring->me,
5192 							   iring->pipe,
5193 							   0);
5194 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5195 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5196 		}
5197 
5198 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5199 			iring = &adev->gfx.compute_ring[i];
5200 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5201 							   iring->me,
5202 							   iring->pipe,
5203 							   0);
5204 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5205 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5206 		}
5207 	}
5208 
5209 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5210 }
5211 
5212 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5213 				      struct amdgpu_ring *ring,
5214 				      bool acquire)
5215 {
5216 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5217 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5218 
5219 	mutex_lock(&adev->srbm_mutex);
5220 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5221 
5222 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5223 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5224 
5225 	soc15_grbm_select(adev, 0, 0, 0, 0);
5226 	mutex_unlock(&adev->srbm_mutex);
5227 }
5228 
5229 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5230 					       enum drm_sched_priority priority)
5231 {
5232 	struct amdgpu_device *adev = ring->adev;
5233 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5234 
5235 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5236 		return;
5237 
5238 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5239 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5240 }
5241 
5242 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5243 {
5244 	struct amdgpu_device *adev = ring->adev;
5245 
5246 	/* XXX check if swapping is necessary on BE */
5247 	if (ring->use_doorbell) {
5248 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5249 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5250 	} else{
5251 		BUG(); /* only DOORBELL method supported on gfx9 now */
5252 	}
5253 }
5254 
5255 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5256 					 u64 seq, unsigned int flags)
5257 {
5258 	struct amdgpu_device *adev = ring->adev;
5259 
5260 	/* we only allocate 32bit for each seq wb address */
5261 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5262 
5263 	/* write fence seq to the "addr" */
5264 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5265 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5266 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5267 	amdgpu_ring_write(ring, lower_32_bits(addr));
5268 	amdgpu_ring_write(ring, upper_32_bits(addr));
5269 	amdgpu_ring_write(ring, lower_32_bits(seq));
5270 
5271 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5272 		/* set register to trigger INT */
5273 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5274 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5275 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5276 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5277 		amdgpu_ring_write(ring, 0);
5278 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5279 	}
5280 }
5281 
5282 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5283 {
5284 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5285 	amdgpu_ring_write(ring, 0);
5286 }
5287 
5288 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5289 {
5290 	struct v9_ce_ib_state ce_payload = {0};
5291 	uint64_t csa_addr;
5292 	int cnt;
5293 
5294 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5295 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5296 
5297 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5298 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5299 				 WRITE_DATA_DST_SEL(8) |
5300 				 WR_CONFIRM) |
5301 				 WRITE_DATA_CACHE_POLICY(0));
5302 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5303 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5304 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5305 }
5306 
5307 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5308 {
5309 	struct v9_de_ib_state de_payload = {0};
5310 	uint64_t csa_addr, gds_addr;
5311 	int cnt;
5312 
5313 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5314 	gds_addr = csa_addr + 4096;
5315 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5316 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5317 
5318 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5319 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5320 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5321 				 WRITE_DATA_DST_SEL(8) |
5322 				 WR_CONFIRM) |
5323 				 WRITE_DATA_CACHE_POLICY(0));
5324 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5325 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5326 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5327 }
5328 
5329 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5330 {
5331 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5332 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5333 }
5334 
5335 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5336 {
5337 	uint32_t dw2 = 0;
5338 
5339 	if (amdgpu_sriov_vf(ring->adev))
5340 		gfx_v9_0_ring_emit_ce_meta(ring);
5341 
5342 	gfx_v9_0_ring_emit_tmz(ring, true);
5343 
5344 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5345 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5346 		/* set load_global_config & load_global_uconfig */
5347 		dw2 |= 0x8001;
5348 		/* set load_cs_sh_regs */
5349 		dw2 |= 0x01000000;
5350 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5351 		dw2 |= 0x10002;
5352 
5353 		/* set load_ce_ram if preamble presented */
5354 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5355 			dw2 |= 0x10000000;
5356 	} else {
5357 		/* still load_ce_ram if this is the first time preamble presented
5358 		 * although there is no context switch happens.
5359 		 */
5360 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5361 			dw2 |= 0x10000000;
5362 	}
5363 
5364 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5365 	amdgpu_ring_write(ring, dw2);
5366 	amdgpu_ring_write(ring, 0);
5367 }
5368 
5369 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5370 {
5371 	unsigned ret;
5372 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5373 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5374 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5375 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5376 	ret = ring->wptr & ring->buf_mask;
5377 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5378 	return ret;
5379 }
5380 
5381 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5382 {
5383 	unsigned cur;
5384 	BUG_ON(offset > ring->buf_mask);
5385 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5386 
5387 	cur = (ring->wptr & ring->buf_mask) - 1;
5388 	if (likely(cur > offset))
5389 		ring->ring[offset] = cur - offset;
5390 	else
5391 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5392 }
5393 
5394 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5395 {
5396 	struct amdgpu_device *adev = ring->adev;
5397 
5398 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5399 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5400 				(5 << 8) |	/* dst: memory */
5401 				(1 << 20));	/* write confirm */
5402 	amdgpu_ring_write(ring, reg);
5403 	amdgpu_ring_write(ring, 0);
5404 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5405 				adev->virt.reg_val_offs * 4));
5406 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5407 				adev->virt.reg_val_offs * 4));
5408 }
5409 
5410 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5411 				    uint32_t val)
5412 {
5413 	uint32_t cmd = 0;
5414 
5415 	switch (ring->funcs->type) {
5416 	case AMDGPU_RING_TYPE_GFX:
5417 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5418 		break;
5419 	case AMDGPU_RING_TYPE_KIQ:
5420 		cmd = (1 << 16); /* no inc addr */
5421 		break;
5422 	default:
5423 		cmd = WR_CONFIRM;
5424 		break;
5425 	}
5426 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5427 	amdgpu_ring_write(ring, cmd);
5428 	amdgpu_ring_write(ring, reg);
5429 	amdgpu_ring_write(ring, 0);
5430 	amdgpu_ring_write(ring, val);
5431 }
5432 
5433 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5434 					uint32_t val, uint32_t mask)
5435 {
5436 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5437 }
5438 
5439 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5440 						  uint32_t reg0, uint32_t reg1,
5441 						  uint32_t ref, uint32_t mask)
5442 {
5443 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5444 	struct amdgpu_device *adev = ring->adev;
5445 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5446 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5447 
5448 	if (fw_version_ok)
5449 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5450 				      ref, mask, 0x20);
5451 	else
5452 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5453 							   ref, mask);
5454 }
5455 
5456 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5457 {
5458 	struct amdgpu_device *adev = ring->adev;
5459 	uint32_t value = 0;
5460 
5461 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5462 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5463 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5464 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5465 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5466 }
5467 
5468 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5469 						 enum amdgpu_interrupt_state state)
5470 {
5471 	switch (state) {
5472 	case AMDGPU_IRQ_STATE_DISABLE:
5473 	case AMDGPU_IRQ_STATE_ENABLE:
5474 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5475 			       TIME_STAMP_INT_ENABLE,
5476 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5477 		break;
5478 	default:
5479 		break;
5480 	}
5481 }
5482 
5483 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5484 						     int me, int pipe,
5485 						     enum amdgpu_interrupt_state state)
5486 {
5487 	u32 mec_int_cntl, mec_int_cntl_reg;
5488 
5489 	/*
5490 	 * amdgpu controls only the first MEC. That's why this function only
5491 	 * handles the setting of interrupts for this specific MEC. All other
5492 	 * pipes' interrupts are set by amdkfd.
5493 	 */
5494 
5495 	if (me == 1) {
5496 		switch (pipe) {
5497 		case 0:
5498 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5499 			break;
5500 		case 1:
5501 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5502 			break;
5503 		case 2:
5504 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5505 			break;
5506 		case 3:
5507 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5508 			break;
5509 		default:
5510 			DRM_DEBUG("invalid pipe %d\n", pipe);
5511 			return;
5512 		}
5513 	} else {
5514 		DRM_DEBUG("invalid me %d\n", me);
5515 		return;
5516 	}
5517 
5518 	switch (state) {
5519 	case AMDGPU_IRQ_STATE_DISABLE:
5520 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5521 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5522 					     TIME_STAMP_INT_ENABLE, 0);
5523 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5524 		break;
5525 	case AMDGPU_IRQ_STATE_ENABLE:
5526 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5527 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5528 					     TIME_STAMP_INT_ENABLE, 1);
5529 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5530 		break;
5531 	default:
5532 		break;
5533 	}
5534 }
5535 
5536 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5537 					     struct amdgpu_irq_src *source,
5538 					     unsigned type,
5539 					     enum amdgpu_interrupt_state state)
5540 {
5541 	switch (state) {
5542 	case AMDGPU_IRQ_STATE_DISABLE:
5543 	case AMDGPU_IRQ_STATE_ENABLE:
5544 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5545 			       PRIV_REG_INT_ENABLE,
5546 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5547 		break;
5548 	default:
5549 		break;
5550 	}
5551 
5552 	return 0;
5553 }
5554 
5555 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5556 					      struct amdgpu_irq_src *source,
5557 					      unsigned type,
5558 					      enum amdgpu_interrupt_state state)
5559 {
5560 	switch (state) {
5561 	case AMDGPU_IRQ_STATE_DISABLE:
5562 	case AMDGPU_IRQ_STATE_ENABLE:
5563 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5564 			       PRIV_INSTR_INT_ENABLE,
5565 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5566 	default:
5567 		break;
5568 	}
5569 
5570 	return 0;
5571 }
5572 
5573 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5574 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5575 			CP_ECC_ERROR_INT_ENABLE, 1)
5576 
5577 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5578 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5579 			CP_ECC_ERROR_INT_ENABLE, 0)
5580 
5581 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5582 					      struct amdgpu_irq_src *source,
5583 					      unsigned type,
5584 					      enum amdgpu_interrupt_state state)
5585 {
5586 	switch (state) {
5587 	case AMDGPU_IRQ_STATE_DISABLE:
5588 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5589 				CP_ECC_ERROR_INT_ENABLE, 0);
5590 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5591 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5592 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5593 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5594 		break;
5595 
5596 	case AMDGPU_IRQ_STATE_ENABLE:
5597 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5598 				CP_ECC_ERROR_INT_ENABLE, 1);
5599 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5600 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5601 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5602 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5603 		break;
5604 	default:
5605 		break;
5606 	}
5607 
5608 	return 0;
5609 }
5610 
5611 
5612 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5613 					    struct amdgpu_irq_src *src,
5614 					    unsigned type,
5615 					    enum amdgpu_interrupt_state state)
5616 {
5617 	switch (type) {
5618 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5619 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5620 		break;
5621 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5622 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5623 		break;
5624 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5625 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5626 		break;
5627 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5628 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5629 		break;
5630 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5631 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5632 		break;
5633 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5634 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5635 		break;
5636 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5637 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5638 		break;
5639 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5640 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5641 		break;
5642 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5643 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5644 		break;
5645 	default:
5646 		break;
5647 	}
5648 	return 0;
5649 }
5650 
5651 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5652 			    struct amdgpu_irq_src *source,
5653 			    struct amdgpu_iv_entry *entry)
5654 {
5655 	int i;
5656 	u8 me_id, pipe_id, queue_id;
5657 	struct amdgpu_ring *ring;
5658 
5659 	DRM_DEBUG("IH: CP EOP\n");
5660 	me_id = (entry->ring_id & 0x0c) >> 2;
5661 	pipe_id = (entry->ring_id & 0x03) >> 0;
5662 	queue_id = (entry->ring_id & 0x70) >> 4;
5663 
5664 	switch (me_id) {
5665 	case 0:
5666 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5667 		break;
5668 	case 1:
5669 	case 2:
5670 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5671 			ring = &adev->gfx.compute_ring[i];
5672 			/* Per-queue interrupt is supported for MEC starting from VI.
5673 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5674 			  */
5675 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5676 				amdgpu_fence_process(ring);
5677 		}
5678 		break;
5679 	}
5680 	return 0;
5681 }
5682 
5683 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5684 			   struct amdgpu_iv_entry *entry)
5685 {
5686 	u8 me_id, pipe_id, queue_id;
5687 	struct amdgpu_ring *ring;
5688 	int i;
5689 
5690 	me_id = (entry->ring_id & 0x0c) >> 2;
5691 	pipe_id = (entry->ring_id & 0x03) >> 0;
5692 	queue_id = (entry->ring_id & 0x70) >> 4;
5693 
5694 	switch (me_id) {
5695 	case 0:
5696 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5697 		break;
5698 	case 1:
5699 	case 2:
5700 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5701 			ring = &adev->gfx.compute_ring[i];
5702 			if (ring->me == me_id && ring->pipe == pipe_id &&
5703 			    ring->queue == queue_id)
5704 				drm_sched_fault(&ring->sched);
5705 		}
5706 		break;
5707 	}
5708 }
5709 
5710 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5711 				 struct amdgpu_irq_src *source,
5712 				 struct amdgpu_iv_entry *entry)
5713 {
5714 	DRM_ERROR("Illegal register access in command stream\n");
5715 	gfx_v9_0_fault(adev, entry);
5716 	return 0;
5717 }
5718 
5719 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5720 				  struct amdgpu_irq_src *source,
5721 				  struct amdgpu_iv_entry *entry)
5722 {
5723 	DRM_ERROR("Illegal instruction in command stream\n");
5724 	gfx_v9_0_fault(adev, entry);
5725 	return 0;
5726 }
5727 
5728 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5729 		struct ras_err_data *err_data,
5730 		struct amdgpu_iv_entry *entry)
5731 {
5732 	/* TODO ue will trigger an interrupt. */
5733 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5734 	if (adev->gfx.funcs->query_ras_error_count)
5735 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5736 	amdgpu_ras_reset_gpu(adev, 0);
5737 	return AMDGPU_RAS_SUCCESS;
5738 }
5739 
5740 static const struct {
5741 	const char *name;
5742 	uint32_t ip;
5743 	uint32_t inst;
5744 	uint32_t seg;
5745 	uint32_t reg_offset;
5746 	uint32_t per_se_instance;
5747 	int32_t num_instance;
5748 	uint32_t sec_count_mask;
5749 	uint32_t ded_count_mask;
5750 } gfx_ras_edc_regs[] = {
5751 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5752 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5753 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5754 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5755 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5756 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5757 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5758 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5759 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5760 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5761 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5762 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5763 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5764 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5765 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5766 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5767 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5768 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5769 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5770 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5771 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5772 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5773 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5774 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5775 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5776 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5777 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5778 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5779 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5780 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5781 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5782 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5783 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5784 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5785 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5786 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5787 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5788 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5789 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5790 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5791 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5792 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5793 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5794 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5795 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5796 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5797 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5798 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5799 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5800 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5801 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5802 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5803 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5804 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5805 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5806 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5807 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5808 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5809 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5810 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5811 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5812 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5813 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5814 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5815 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5816 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5817 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5818 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5819 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5820 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5821 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5822 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5823 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5824 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5825 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5826 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5827 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5828 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5829 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5830 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5831 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5832 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5833 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5834 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5835 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5836 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5837 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5838 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5839 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5840 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5841 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5842 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5843 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5845 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5846 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5847 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5848 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5849 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5851 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5852 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5853 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5854 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5855 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5856 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5857 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5858 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5859 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5860 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5861 	  0 },
5862 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5863 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5864 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5865 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5866 	  0 },
5867 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5868 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5869 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5870 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5871 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5872 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5873 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5874 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5875 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5876 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5877 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5878 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5879 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5880 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5881 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5882 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5883 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5884 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5885 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5886 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5887 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5888 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5889 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5890 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5891 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5892 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5893 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5894 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5895 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5896 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5897 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5898 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5899 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5900 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5901 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5902 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5903 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5904 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5905 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5906 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5907 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5908 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5909 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5910 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5911 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5912 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5913 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5914 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5915 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5916 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5917 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5918 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5919 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5920 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5921 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5922 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5923 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5924 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5925 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5926 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5927 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5928 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5929 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5930 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5931 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5932 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5933 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5934 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5935 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5936 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5937 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5938 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5939 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5940 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5941 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5942 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5943 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5944 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5945 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5946 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5947 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5948 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5949 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5950 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5951 	  0 },
5952 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5953 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5954 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5955 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5956 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5957 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5958 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5959 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5960 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5961 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5962 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5963 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5964 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5965 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5966 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5967 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5968 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5969 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5970 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5971 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5972 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5973 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5974 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5975 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5976 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5977 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5978 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5979 	  0 },
5980 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5981 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5982 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5983 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5984 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5985 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5986 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5987 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5988 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5989 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5990 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5991 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5992 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5993 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5994 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5995 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5996 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5997 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5998 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5999 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6000 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6001 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6002 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6003 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6004 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6005 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6006 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6008 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6009 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6010 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6011 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6012 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6013 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6014 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6015 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6016 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6017 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6018 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6019 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6020 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6021 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6022 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6023 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6024 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6025 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6027 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6028 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6029 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6030 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6031 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6033 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6034 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6035 };
6036 
6037 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6038 				     void *inject_if)
6039 {
6040 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6041 	int ret;
6042 	struct ta_ras_trigger_error_input block_info = { 0 };
6043 
6044 	if (adev->asic_type != CHIP_VEGA20)
6045 		return -EINVAL;
6046 
6047 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6048 		return -EINVAL;
6049 
6050 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6051 		return -EPERM;
6052 
6053 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6054 	      info->head.type)) {
6055 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6056 			ras_gfx_subblocks[info->head.sub_block_index].name,
6057 			info->head.type);
6058 		return -EPERM;
6059 	}
6060 
6061 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6062 	      info->head.type)) {
6063 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6064 			ras_gfx_subblocks[info->head.sub_block_index].name,
6065 			info->head.type);
6066 		return -EPERM;
6067 	}
6068 
6069 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6070 	block_info.sub_block_index =
6071 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6072 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6073 	block_info.address = info->address;
6074 	block_info.value = info->value;
6075 
6076 	mutex_lock(&adev->grbm_idx_mutex);
6077 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6078 	mutex_unlock(&adev->grbm_idx_mutex);
6079 
6080 	return ret;
6081 }
6082 
6083 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6084 					  void *ras_error_status)
6085 {
6086 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6087 	uint32_t sec_count, ded_count;
6088 	uint32_t i;
6089 	uint32_t reg_value;
6090 	uint32_t se_id, instance_id;
6091 
6092 	if (adev->asic_type != CHIP_VEGA20)
6093 		return -EINVAL;
6094 
6095 	err_data->ue_count = 0;
6096 	err_data->ce_count = 0;
6097 
6098 	mutex_lock(&adev->grbm_idx_mutex);
6099 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6100 		for (instance_id = 0; instance_id < 256; instance_id++) {
6101 			for (i = 0;
6102 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6103 			     i++) {
6104 				if (se_id != 0 &&
6105 				    !gfx_ras_edc_regs[i].per_se_instance)
6106 					continue;
6107 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6108 					continue;
6109 
6110 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6111 						      instance_id);
6112 
6113 				reg_value = RREG32(
6114 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6115 							[gfx_ras_edc_regs[i].inst]
6116 							[gfx_ras_edc_regs[i].seg] +
6117 					gfx_ras_edc_regs[i].reg_offset);
6118 				sec_count = reg_value &
6119 					    gfx_ras_edc_regs[i].sec_count_mask;
6120 				ded_count = reg_value &
6121 					    gfx_ras_edc_regs[i].ded_count_mask;
6122 				if (sec_count) {
6123 					DRM_INFO(
6124 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6125 						se_id, instance_id,
6126 						gfx_ras_edc_regs[i].name,
6127 						sec_count);
6128 					err_data->ce_count++;
6129 				}
6130 
6131 				if (ded_count) {
6132 					DRM_INFO(
6133 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6134 						se_id, instance_id,
6135 						gfx_ras_edc_regs[i].name,
6136 						ded_count);
6137 					err_data->ue_count++;
6138 				}
6139 			}
6140 		}
6141 	}
6142 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6143 	mutex_unlock(&adev->grbm_idx_mutex);
6144 
6145 	return 0;
6146 }
6147 
6148 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6149 				  struct amdgpu_irq_src *source,
6150 				  struct amdgpu_iv_entry *entry)
6151 {
6152 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6153 	struct ras_dispatch_if ih_data = {
6154 		.entry = entry,
6155 	};
6156 
6157 	if (!ras_if)
6158 		return 0;
6159 
6160 	ih_data.head = *ras_if;
6161 
6162 	DRM_ERROR("CP ECC ERROR IRQ\n");
6163 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6164 	return 0;
6165 }
6166 
6167 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6168 	.name = "gfx_v9_0",
6169 	.early_init = gfx_v9_0_early_init,
6170 	.late_init = gfx_v9_0_late_init,
6171 	.sw_init = gfx_v9_0_sw_init,
6172 	.sw_fini = gfx_v9_0_sw_fini,
6173 	.hw_init = gfx_v9_0_hw_init,
6174 	.hw_fini = gfx_v9_0_hw_fini,
6175 	.suspend = gfx_v9_0_suspend,
6176 	.resume = gfx_v9_0_resume,
6177 	.is_idle = gfx_v9_0_is_idle,
6178 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6179 	.soft_reset = gfx_v9_0_soft_reset,
6180 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6181 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6182 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6183 };
6184 
6185 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6186 	.type = AMDGPU_RING_TYPE_GFX,
6187 	.align_mask = 0xff,
6188 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6189 	.support_64bit_ptrs = true,
6190 	.vmhub = AMDGPU_GFXHUB_0,
6191 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6192 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6193 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6194 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6195 		5 +  /* COND_EXEC */
6196 		7 +  /* PIPELINE_SYNC */
6197 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6198 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6199 		2 + /* VM_FLUSH */
6200 		8 +  /* FENCE for VM_FLUSH */
6201 		20 + /* GDS switch */
6202 		4 + /* double SWITCH_BUFFER,
6203 		       the first COND_EXEC jump to the place just
6204 			   prior to this double SWITCH_BUFFER  */
6205 		5 + /* COND_EXEC */
6206 		7 +	 /*	HDP_flush */
6207 		4 +	 /*	VGT_flush */
6208 		14 + /*	CE_META */
6209 		31 + /*	DE_META */
6210 		3 + /* CNTX_CTRL */
6211 		5 + /* HDP_INVL */
6212 		8 + 8 + /* FENCE x2 */
6213 		2, /* SWITCH_BUFFER */
6214 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6215 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6216 	.emit_fence = gfx_v9_0_ring_emit_fence,
6217 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6218 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6219 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6220 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6221 	.test_ring = gfx_v9_0_ring_test_ring,
6222 	.test_ib = gfx_v9_0_ring_test_ib,
6223 	.insert_nop = amdgpu_ring_insert_nop,
6224 	.pad_ib = amdgpu_ring_generic_pad_ib,
6225 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6226 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6227 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6228 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6229 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6230 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6231 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6232 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6233 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6234 };
6235 
6236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6237 	.type = AMDGPU_RING_TYPE_COMPUTE,
6238 	.align_mask = 0xff,
6239 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6240 	.support_64bit_ptrs = true,
6241 	.vmhub = AMDGPU_GFXHUB_0,
6242 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6243 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6244 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6245 	.emit_frame_size =
6246 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6247 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6248 		5 + /* hdp invalidate */
6249 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6250 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6251 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6252 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6253 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6254 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6255 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6256 	.emit_fence = gfx_v9_0_ring_emit_fence,
6257 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6258 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6259 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6260 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6261 	.test_ring = gfx_v9_0_ring_test_ring,
6262 	.test_ib = gfx_v9_0_ring_test_ib,
6263 	.insert_nop = amdgpu_ring_insert_nop,
6264 	.pad_ib = amdgpu_ring_generic_pad_ib,
6265 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6266 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6267 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6268 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6269 };
6270 
6271 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6272 	.type = AMDGPU_RING_TYPE_KIQ,
6273 	.align_mask = 0xff,
6274 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6275 	.support_64bit_ptrs = true,
6276 	.vmhub = AMDGPU_GFXHUB_0,
6277 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6278 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6279 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6280 	.emit_frame_size =
6281 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6282 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6283 		5 + /* hdp invalidate */
6284 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6285 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6286 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6287 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6288 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6289 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6290 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6291 	.test_ring = gfx_v9_0_ring_test_ring,
6292 	.insert_nop = amdgpu_ring_insert_nop,
6293 	.pad_ib = amdgpu_ring_generic_pad_ib,
6294 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6295 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6296 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6297 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6298 };
6299 
6300 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6301 {
6302 	int i;
6303 
6304 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6305 
6306 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6307 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6308 
6309 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6310 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6311 }
6312 
6313 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6314 	.set = gfx_v9_0_set_eop_interrupt_state,
6315 	.process = gfx_v9_0_eop_irq,
6316 };
6317 
6318 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6319 	.set = gfx_v9_0_set_priv_reg_fault_state,
6320 	.process = gfx_v9_0_priv_reg_irq,
6321 };
6322 
6323 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6324 	.set = gfx_v9_0_set_priv_inst_fault_state,
6325 	.process = gfx_v9_0_priv_inst_irq,
6326 };
6327 
6328 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6329 	.set = gfx_v9_0_set_cp_ecc_error_state,
6330 	.process = gfx_v9_0_cp_ecc_error_irq,
6331 };
6332 
6333 
6334 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6335 {
6336 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6337 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6338 
6339 	adev->gfx.priv_reg_irq.num_types = 1;
6340 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6341 
6342 	adev->gfx.priv_inst_irq.num_types = 1;
6343 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6344 
6345 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6346 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6347 }
6348 
6349 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6350 {
6351 	switch (adev->asic_type) {
6352 	case CHIP_VEGA10:
6353 	case CHIP_VEGA12:
6354 	case CHIP_VEGA20:
6355 	case CHIP_RAVEN:
6356 	case CHIP_ARCTURUS:
6357 	case CHIP_RENOIR:
6358 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6359 		break;
6360 	default:
6361 		break;
6362 	}
6363 }
6364 
6365 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6366 {
6367 	/* init asci gds info */
6368 	switch (adev->asic_type) {
6369 	case CHIP_VEGA10:
6370 	case CHIP_VEGA12:
6371 	case CHIP_VEGA20:
6372 		adev->gds.gds_size = 0x10000;
6373 		break;
6374 	case CHIP_RAVEN:
6375 	case CHIP_ARCTURUS:
6376 		adev->gds.gds_size = 0x1000;
6377 		break;
6378 	default:
6379 		adev->gds.gds_size = 0x10000;
6380 		break;
6381 	}
6382 
6383 	switch (adev->asic_type) {
6384 	case CHIP_VEGA10:
6385 	case CHIP_VEGA20:
6386 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6387 		break;
6388 	case CHIP_VEGA12:
6389 		adev->gds.gds_compute_max_wave_id = 0x27f;
6390 		break;
6391 	case CHIP_RAVEN:
6392 		if (adev->rev_id >= 0x8)
6393 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6394 		else
6395 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6396 		break;
6397 	case CHIP_ARCTURUS:
6398 		adev->gds.gds_compute_max_wave_id = 0xfff;
6399 		break;
6400 	default:
6401 		/* this really depends on the chip */
6402 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6403 		break;
6404 	}
6405 
6406 	adev->gds.gws_size = 64;
6407 	adev->gds.oa_size = 16;
6408 }
6409 
6410 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6411 						 u32 bitmap)
6412 {
6413 	u32 data;
6414 
6415 	if (!bitmap)
6416 		return;
6417 
6418 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6419 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6420 
6421 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6422 }
6423 
6424 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6425 {
6426 	u32 data, mask;
6427 
6428 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6429 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6430 
6431 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6432 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6433 
6434 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6435 
6436 	return (~data) & mask;
6437 }
6438 
6439 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6440 				 struct amdgpu_cu_info *cu_info)
6441 {
6442 	int i, j, k, counter, active_cu_number = 0;
6443 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6444 	unsigned disable_masks[4 * 4];
6445 
6446 	if (!adev || !cu_info)
6447 		return -EINVAL;
6448 
6449 	/*
6450 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6451 	 */
6452 	if (adev->gfx.config.max_shader_engines *
6453 		adev->gfx.config.max_sh_per_se > 16)
6454 		return -EINVAL;
6455 
6456 	amdgpu_gfx_parse_disable_cu(disable_masks,
6457 				    adev->gfx.config.max_shader_engines,
6458 				    adev->gfx.config.max_sh_per_se);
6459 
6460 	mutex_lock(&adev->grbm_idx_mutex);
6461 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6462 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6463 			mask = 1;
6464 			ao_bitmap = 0;
6465 			counter = 0;
6466 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6467 			gfx_v9_0_set_user_cu_inactive_bitmap(
6468 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6469 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6470 
6471 			/*
6472 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6473 			 * 4x4 size array, and it's usually suitable for Vega
6474 			 * ASICs which has 4*2 SE/SH layout.
6475 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6476 			 * To mostly reduce the impact, we make it compatible
6477 			 * with current bitmap array as below:
6478 			 *    SE4,SH0 --> bitmap[0][1]
6479 			 *    SE5,SH0 --> bitmap[1][1]
6480 			 *    SE6,SH0 --> bitmap[2][1]
6481 			 *    SE7,SH0 --> bitmap[3][1]
6482 			 */
6483 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6484 
6485 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6486 				if (bitmap & mask) {
6487 					if (counter < adev->gfx.config.max_cu_per_sh)
6488 						ao_bitmap |= mask;
6489 					counter ++;
6490 				}
6491 				mask <<= 1;
6492 			}
6493 			active_cu_number += counter;
6494 			if (i < 2 && j < 2)
6495 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6496 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6497 		}
6498 	}
6499 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6500 	mutex_unlock(&adev->grbm_idx_mutex);
6501 
6502 	cu_info->number = active_cu_number;
6503 	cu_info->ao_cu_mask = ao_cu_mask;
6504 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6505 
6506 	return 0;
6507 }
6508 
6509 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6510 {
6511 	.type = AMD_IP_BLOCK_TYPE_GFX,
6512 	.major = 9,
6513 	.minor = 0,
6514 	.rev = 0,
6515 	.funcs = &gfx_v9_0_ip_funcs,
6516 };
6517