1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58
59 #define GFX9_NUM_GFX_RINGS 1
60 #define GFX9_NUM_SW_GFX_RINGS 2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64
65 #define mmGCEA_PROBE_MAP 0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX 0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133
134 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
136 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
138 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
140 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
142 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
144 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
146
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
151
152 enum ta_ras_gfx_subblock {
153 /*CPC*/
154 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156 TA_RAS_BLOCK__GFX_CPC_UCODE,
157 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164 /* CPF*/
165 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168 TA_RAS_BLOCK__GFX_CPF_TAG,
169 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170 /* CPG*/
171 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174 TA_RAS_BLOCK__GFX_CPG_TAG,
175 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176 /* GDS*/
177 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184 /* SPI*/
185 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186 /* SQ*/
187 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189 TA_RAS_BLOCK__GFX_SQ_LDS_D,
190 TA_RAS_BLOCK__GFX_SQ_LDS_I,
191 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193 /* SQC (3 ranges)*/
194 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195 /* SQC range 0*/
196 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207 /* SQC range 1*/
208 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221 /* SQC range 2*/
222 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236 /* TA*/
237 TA_RAS_BLOCK__GFX_TA_INDEX_START,
238 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244 /* TCA*/
245 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249 /* TCC (5 sub-ranges)*/
250 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251 /* TCC range 0*/
252 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262 /* TCC range 1*/
263 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268 /* TCC range 2*/
269 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280 /* TCC range 3*/
281 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286 /* TCC range 4*/
287 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294 /* TCI*/
295 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296 /* TCP*/
297 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306 /* TD*/
307 TA_RAS_BLOCK__GFX_TD_INDEX_START,
308 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312 /* EA (3 sub-ranges)*/
313 TA_RAS_BLOCK__GFX_EA_INDEX_START,
314 /* EA range 0*/
315 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325 /* EA range 1*/
326 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335 /* EA range 2*/
336 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343 /* UTC VM L2 bank*/
344 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345 /* UTC VM walker*/
346 TA_RAS_BLOCK__UTC_VML2_WALKER,
347 /* UTC ATC L2 2MB cache*/
348 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349 /* UTC ATC L2 4KB cache*/
350 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351 TA_RAS_BLOCK__GFX_MAX
352 };
353
354 struct ras_gfx_subblock {
355 unsigned char *name;
356 int ta_subblock;
357 int hw_supported_error_type;
358 int sw_supported_error_type;
359 };
360
361 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
362 [AMDGPU_RAS_BLOCK__##subblock] = { \
363 #subblock, \
364 TA_RAS_BLOCK__##subblock, \
365 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
366 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
367 }
368
369 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387 0),
388 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389 0),
390 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 0, 0),
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400 0),
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 0, 0),
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404 0),
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406 0, 0),
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408 0),
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 1),
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 0, 0, 0),
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 0),
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 0),
417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 0),
419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 0),
421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 0),
423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 0, 0),
425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 0),
427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428 0),
429 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430 0, 0, 0),
431 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 0),
433 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434 0),
435 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436 0),
437 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442 0, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444 0),
445 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454 1),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456 1),
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458 1),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475 0),
476 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480 0, 0),
481 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517 };
518
519 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520 {
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541 };
542
543 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544 {
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563 };
564
565 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566 {
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578 };
579
580 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581 {
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606 };
607
608 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609 {
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617 };
618
619 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620 {
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643 {
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656 };
657
658 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659 {
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663 };
664
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666 {
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683 };
684
685 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686 {
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700 };
701
702 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703 {
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715 };
716
717 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720 };
721
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723 {
724 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 };
733
734 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735 {
736 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 };
745
746 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750
751 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756 struct amdgpu_cu_info *cu_info);
757 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
759 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761 void *ras_error_status);
762 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763 void *inject_if, uint32_t instance_mask);
764 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
766 unsigned int vmid);
767
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)768 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
769 uint64_t queue_mask)
770 {
771 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
772 amdgpu_ring_write(kiq_ring,
773 PACKET3_SET_RESOURCES_VMID_MASK(0) |
774 /* vmid_mask:0* queue_type:0 (KIQ) */
775 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
776 amdgpu_ring_write(kiq_ring,
777 lower_32_bits(queue_mask)); /* queue mask lo */
778 amdgpu_ring_write(kiq_ring,
779 upper_32_bits(queue_mask)); /* queue mask hi */
780 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
781 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
782 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
783 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
784 }
785
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)786 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
787 struct amdgpu_ring *ring)
788 {
789 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
790 uint64_t wptr_addr = ring->wptr_gpu_addr;
791 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
792
793 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
794 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
795 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
796 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
797 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
798 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
799 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
800 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
801 /*queue_type: normal compute queue */
802 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
803 /* alloc format: all_on_one_pipe */
804 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
805 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
806 /* num_queues: must be 1 */
807 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
808 amdgpu_ring_write(kiq_ring,
809 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
810 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
811 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
812 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
813 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
814 }
815
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)816 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
817 struct amdgpu_ring *ring,
818 enum amdgpu_unmap_queues_action action,
819 u64 gpu_addr, u64 seq)
820 {
821 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
822
823 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
824 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
825 PACKET3_UNMAP_QUEUES_ACTION(action) |
826 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
827 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
828 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
829 amdgpu_ring_write(kiq_ring,
830 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
831
832 if (action == PREEMPT_QUEUES_NO_UNMAP) {
833 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
834 amdgpu_ring_write(kiq_ring, 0);
835 amdgpu_ring_write(kiq_ring, 0);
836
837 } else {
838 amdgpu_ring_write(kiq_ring, 0);
839 amdgpu_ring_write(kiq_ring, 0);
840 amdgpu_ring_write(kiq_ring, 0);
841 }
842 }
843
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)844 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
845 struct amdgpu_ring *ring,
846 u64 addr,
847 u64 seq)
848 {
849 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
850
851 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
852 amdgpu_ring_write(kiq_ring,
853 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
854 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
855 PACKET3_QUERY_STATUS_COMMAND(2));
856 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
857 amdgpu_ring_write(kiq_ring,
858 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
859 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
860 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
861 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
862 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
863 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
864 }
865
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)866 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
867 uint16_t pasid, uint32_t flush_type,
868 bool all_hub)
869 {
870 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
871 amdgpu_ring_write(kiq_ring,
872 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
873 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
874 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
875 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
876 }
877
878 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
879 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
880 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
881 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
882 .kiq_query_status = gfx_v9_0_kiq_query_status,
883 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
884 .set_resources_size = 8,
885 .map_queues_size = 7,
886 .unmap_queues_size = 6,
887 .query_status_size = 7,
888 .invalidate_tlbs_size = 2,
889 };
890
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)891 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
892 {
893 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
894 }
895
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)896 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
897 {
898 switch (adev->ip_versions[GC_HWIP][0]) {
899 case IP_VERSION(9, 0, 1):
900 soc15_program_register_sequence(adev,
901 golden_settings_gc_9_0,
902 ARRAY_SIZE(golden_settings_gc_9_0));
903 soc15_program_register_sequence(adev,
904 golden_settings_gc_9_0_vg10,
905 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
906 break;
907 case IP_VERSION(9, 2, 1):
908 soc15_program_register_sequence(adev,
909 golden_settings_gc_9_2_1,
910 ARRAY_SIZE(golden_settings_gc_9_2_1));
911 soc15_program_register_sequence(adev,
912 golden_settings_gc_9_2_1_vg12,
913 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
914 break;
915 case IP_VERSION(9, 4, 0):
916 soc15_program_register_sequence(adev,
917 golden_settings_gc_9_0,
918 ARRAY_SIZE(golden_settings_gc_9_0));
919 soc15_program_register_sequence(adev,
920 golden_settings_gc_9_0_vg20,
921 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
922 break;
923 case IP_VERSION(9, 4, 1):
924 soc15_program_register_sequence(adev,
925 golden_settings_gc_9_4_1_arct,
926 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
927 break;
928 case IP_VERSION(9, 2, 2):
929 case IP_VERSION(9, 1, 0):
930 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
931 ARRAY_SIZE(golden_settings_gc_9_1));
932 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
933 soc15_program_register_sequence(adev,
934 golden_settings_gc_9_1_rv2,
935 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
936 else
937 soc15_program_register_sequence(adev,
938 golden_settings_gc_9_1_rv1,
939 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
940 break;
941 case IP_VERSION(9, 3, 0):
942 soc15_program_register_sequence(adev,
943 golden_settings_gc_9_1_rn,
944 ARRAY_SIZE(golden_settings_gc_9_1_rn));
945 return; /* for renoir, don't need common goldensetting */
946 case IP_VERSION(9, 4, 2):
947 gfx_v9_4_2_init_golden_registers(adev,
948 adev->smuio.funcs->get_die_id(adev));
949 break;
950 default:
951 break;
952 }
953
954 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
955 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
956 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
957 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
958 }
959
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)960 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
961 bool wc, uint32_t reg, uint32_t val)
962 {
963 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
964 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
965 WRITE_DATA_DST_SEL(0) |
966 (wc ? WR_CONFIRM : 0));
967 amdgpu_ring_write(ring, reg);
968 amdgpu_ring_write(ring, 0);
969 amdgpu_ring_write(ring, val);
970 }
971
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)972 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
973 int mem_space, int opt, uint32_t addr0,
974 uint32_t addr1, uint32_t ref, uint32_t mask,
975 uint32_t inv)
976 {
977 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
978 amdgpu_ring_write(ring,
979 /* memory (1) or register (0) */
980 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
981 WAIT_REG_MEM_OPERATION(opt) | /* wait */
982 WAIT_REG_MEM_FUNCTION(3) | /* equal */
983 WAIT_REG_MEM_ENGINE(eng_sel)));
984
985 if (mem_space)
986 BUG_ON(addr0 & 0x3); /* Dword align */
987 amdgpu_ring_write(ring, addr0);
988 amdgpu_ring_write(ring, addr1);
989 amdgpu_ring_write(ring, ref);
990 amdgpu_ring_write(ring, mask);
991 amdgpu_ring_write(ring, inv); /* poll interval */
992 }
993
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)994 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
995 {
996 struct amdgpu_device *adev = ring->adev;
997 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
998 uint32_t tmp = 0;
999 unsigned i;
1000 int r;
1001
1002 WREG32(scratch, 0xCAFEDEAD);
1003 r = amdgpu_ring_alloc(ring, 3);
1004 if (r)
1005 return r;
1006
1007 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1008 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1009 amdgpu_ring_write(ring, 0xDEADBEEF);
1010 amdgpu_ring_commit(ring);
1011
1012 for (i = 0; i < adev->usec_timeout; i++) {
1013 tmp = RREG32(scratch);
1014 if (tmp == 0xDEADBEEF)
1015 break;
1016 udelay(1);
1017 }
1018
1019 if (i >= adev->usec_timeout)
1020 r = -ETIMEDOUT;
1021 return r;
1022 }
1023
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1024 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1025 {
1026 struct amdgpu_device *adev = ring->adev;
1027 struct amdgpu_ib ib;
1028 struct dma_fence *f = NULL;
1029
1030 unsigned index;
1031 uint64_t gpu_addr;
1032 uint32_t tmp;
1033 long r;
1034
1035 r = amdgpu_device_wb_get(adev, &index);
1036 if (r)
1037 return r;
1038
1039 gpu_addr = adev->wb.gpu_addr + (index * 4);
1040 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1041 memset(&ib, 0, sizeof(ib));
1042
1043 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1044 if (r)
1045 goto err1;
1046
1047 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1048 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1049 ib.ptr[2] = lower_32_bits(gpu_addr);
1050 ib.ptr[3] = upper_32_bits(gpu_addr);
1051 ib.ptr[4] = 0xDEADBEEF;
1052 ib.length_dw = 5;
1053
1054 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1055 if (r)
1056 goto err2;
1057
1058 r = dma_fence_wait_timeout(f, false, timeout);
1059 if (r == 0) {
1060 r = -ETIMEDOUT;
1061 goto err2;
1062 } else if (r < 0) {
1063 goto err2;
1064 }
1065
1066 tmp = adev->wb.wb[index];
1067 if (tmp == 0xDEADBEEF)
1068 r = 0;
1069 else
1070 r = -EINVAL;
1071
1072 err2:
1073 amdgpu_ib_free(adev, &ib, NULL);
1074 dma_fence_put(f);
1075 err1:
1076 amdgpu_device_wb_free(adev, index);
1077 return r;
1078 }
1079
1080
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1081 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1082 {
1083 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1084 amdgpu_ucode_release(&adev->gfx.me_fw);
1085 amdgpu_ucode_release(&adev->gfx.ce_fw);
1086 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1087 amdgpu_ucode_release(&adev->gfx.mec_fw);
1088 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1089
1090 kfree(adev->gfx.rlc.register_list_format);
1091 }
1092
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1093 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1094 {
1095 adev->gfx.me_fw_write_wait = false;
1096 adev->gfx.mec_fw_write_wait = false;
1097
1098 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1099 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1100 (adev->gfx.mec_feature_version < 46) ||
1101 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1102 (adev->gfx.pfp_feature_version < 46)))
1103 DRM_WARN_ONCE("CP firmware version too old, please update!");
1104
1105 switch (adev->ip_versions[GC_HWIP][0]) {
1106 case IP_VERSION(9, 0, 1):
1107 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1108 (adev->gfx.me_feature_version >= 42) &&
1109 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1110 (adev->gfx.pfp_feature_version >= 42))
1111 adev->gfx.me_fw_write_wait = true;
1112
1113 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1114 (adev->gfx.mec_feature_version >= 42))
1115 adev->gfx.mec_fw_write_wait = true;
1116 break;
1117 case IP_VERSION(9, 2, 1):
1118 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119 (adev->gfx.me_feature_version >= 44) &&
1120 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1121 (adev->gfx.pfp_feature_version >= 44))
1122 adev->gfx.me_fw_write_wait = true;
1123
1124 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1125 (adev->gfx.mec_feature_version >= 44))
1126 adev->gfx.mec_fw_write_wait = true;
1127 break;
1128 case IP_VERSION(9, 4, 0):
1129 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130 (adev->gfx.me_feature_version >= 44) &&
1131 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1132 (adev->gfx.pfp_feature_version >= 44))
1133 adev->gfx.me_fw_write_wait = true;
1134
1135 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1136 (adev->gfx.mec_feature_version >= 44))
1137 adev->gfx.mec_fw_write_wait = true;
1138 break;
1139 case IP_VERSION(9, 1, 0):
1140 case IP_VERSION(9, 2, 2):
1141 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1142 (adev->gfx.me_feature_version >= 42) &&
1143 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1144 (adev->gfx.pfp_feature_version >= 42))
1145 adev->gfx.me_fw_write_wait = true;
1146
1147 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1148 (adev->gfx.mec_feature_version >= 42))
1149 adev->gfx.mec_fw_write_wait = true;
1150 break;
1151 default:
1152 adev->gfx.me_fw_write_wait = true;
1153 adev->gfx.mec_fw_write_wait = true;
1154 break;
1155 }
1156 }
1157
1158 struct amdgpu_gfxoff_quirk {
1159 u16 chip_vendor;
1160 u16 chip_device;
1161 u16 subsys_vendor;
1162 u16 subsys_device;
1163 u8 revision;
1164 };
1165
1166 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1167 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1168 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1169 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1170 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1171 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1172 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1173 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1174 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1175 /* https://bbs.openkylin.top/t/topic/171497 */
1176 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1177 /* HP 705G4 DM with R5 2400G */
1178 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1179 { 0, 0, 0, 0, 0 },
1180 };
1181
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1182 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1183 {
1184 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1185
1186 while (p && p->chip_device != 0) {
1187 if (pdev->vendor == p->chip_vendor &&
1188 pdev->device == p->chip_device &&
1189 pdev->subsystem_vendor == p->subsys_vendor &&
1190 pdev->subsystem_device == p->subsys_device &&
1191 pdev->revision == p->revision) {
1192 return true;
1193 }
1194 ++p;
1195 }
1196 return false;
1197 }
1198
is_raven_kicker(struct amdgpu_device * adev)1199 static bool is_raven_kicker(struct amdgpu_device *adev)
1200 {
1201 if (adev->pm.fw_version >= 0x41e2b)
1202 return true;
1203 else
1204 return false;
1205 }
1206
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1207 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1208 {
1209 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1210 (adev->gfx.me_fw_version >= 0x000000a5) &&
1211 (adev->gfx.me_feature_version >= 52))
1212 return true;
1213 else
1214 return false;
1215 }
1216
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1217 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1218 {
1219 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1220 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221
1222 switch (adev->ip_versions[GC_HWIP][0]) {
1223 case IP_VERSION(9, 0, 1):
1224 case IP_VERSION(9, 2, 1):
1225 case IP_VERSION(9, 4, 0):
1226 break;
1227 case IP_VERSION(9, 2, 2):
1228 case IP_VERSION(9, 1, 0):
1229 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1230 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1231 ((!is_raven_kicker(adev) &&
1232 adev->gfx.rlc_fw_version < 531) ||
1233 (adev->gfx.rlc_feature_version < 1) ||
1234 !adev->gfx.rlc.is_rlc_v2_1))
1235 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1236
1237 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1238 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1239 AMD_PG_SUPPORT_CP |
1240 AMD_PG_SUPPORT_RLC_SMU_HS;
1241 break;
1242 case IP_VERSION(9, 3, 0):
1243 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1244 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1245 AMD_PG_SUPPORT_CP |
1246 AMD_PG_SUPPORT_RLC_SMU_HS;
1247 break;
1248 default:
1249 break;
1250 }
1251 }
1252
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1253 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1254 char *chip_name)
1255 {
1256 char fw_name[30];
1257 int err;
1258
1259 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1260 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1261 if (err)
1262 goto out;
1263 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1264
1265 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1266 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1267 if (err)
1268 goto out;
1269 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1270
1271 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1272 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1273 if (err)
1274 goto out;
1275 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1276
1277 out:
1278 if (err) {
1279 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1280 amdgpu_ucode_release(&adev->gfx.me_fw);
1281 amdgpu_ucode_release(&adev->gfx.ce_fw);
1282 }
1283 return err;
1284 }
1285
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1286 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1287 char *chip_name)
1288 {
1289 char fw_name[30];
1290 int err;
1291 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1292 uint16_t version_major;
1293 uint16_t version_minor;
1294 uint32_t smu_version;
1295
1296 /*
1297 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1298 * instead of picasso_rlc.bin.
1299 * Judgment method:
1300 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1301 * or revision >= 0xD8 && revision <= 0xDF
1302 * otherwise is PCO FP5
1303 */
1304 if (!strcmp(chip_name, "picasso") &&
1305 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1306 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1307 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1308 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1309 (smu_version >= 0x41e2b))
1310 /**
1311 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1312 */
1313 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1314 else
1315 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1316 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1317 if (err)
1318 goto out;
1319 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1320
1321 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1322 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1323 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1324 out:
1325 if (err)
1326 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1327
1328 return err;
1329 }
1330
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1331 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1332 {
1333 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1334 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1335 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1336 return false;
1337
1338 return true;
1339 }
1340
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1341 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1342 char *chip_name)
1343 {
1344 char fw_name[30];
1345 int err;
1346
1347 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1348 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1349 else
1350 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1351
1352 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1353 if (err)
1354 goto out;
1355 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1356 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1357
1358 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1359 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1360 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1361 else
1362 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1363
1364 /* ignore failures to load */
1365 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1366 if (!err) {
1367 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1368 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1369 } else {
1370 err = 0;
1371 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1372 }
1373 } else {
1374 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1375 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1376 }
1377
1378 gfx_v9_0_check_if_need_gfxoff(adev);
1379 gfx_v9_0_check_fw_write_wait(adev);
1380
1381 out:
1382 if (err)
1383 amdgpu_ucode_release(&adev->gfx.mec_fw);
1384 return err;
1385 }
1386
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1387 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1388 {
1389 char ucode_prefix[30];
1390 int r;
1391
1392 DRM_DEBUG("\n");
1393 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1394
1395 /* No CPG in Arcturus */
1396 if (adev->gfx.num_gfx_rings) {
1397 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1398 if (r)
1399 return r;
1400 }
1401
1402 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1403 if (r)
1404 return r;
1405
1406 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1407 if (r)
1408 return r;
1409
1410 return r;
1411 }
1412
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1413 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1414 {
1415 u32 count = 0;
1416 const struct cs_section_def *sect = NULL;
1417 const struct cs_extent_def *ext = NULL;
1418
1419 /* begin clear state */
1420 count += 2;
1421 /* context control state */
1422 count += 3;
1423
1424 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1425 for (ext = sect->section; ext->extent != NULL; ++ext) {
1426 if (sect->id == SECT_CONTEXT)
1427 count += 2 + ext->reg_count;
1428 else
1429 return 0;
1430 }
1431 }
1432
1433 /* end clear state */
1434 count += 2;
1435 /* clear state */
1436 count += 2;
1437
1438 return count;
1439 }
1440
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1441 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1442 volatile u32 *buffer)
1443 {
1444 u32 count = 0, i;
1445 const struct cs_section_def *sect = NULL;
1446 const struct cs_extent_def *ext = NULL;
1447
1448 if (adev->gfx.rlc.cs_data == NULL)
1449 return;
1450 if (buffer == NULL)
1451 return;
1452
1453 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1454 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1455
1456 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1457 buffer[count++] = cpu_to_le32(0x80000000);
1458 buffer[count++] = cpu_to_le32(0x80000000);
1459
1460 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1461 for (ext = sect->section; ext->extent != NULL; ++ext) {
1462 if (sect->id == SECT_CONTEXT) {
1463 buffer[count++] =
1464 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1465 buffer[count++] = cpu_to_le32(ext->reg_index -
1466 PACKET3_SET_CONTEXT_REG_START);
1467 for (i = 0; i < ext->reg_count; i++)
1468 buffer[count++] = cpu_to_le32(ext->extent[i]);
1469 } else {
1470 return;
1471 }
1472 }
1473 }
1474
1475 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1476 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1477
1478 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1479 buffer[count++] = cpu_to_le32(0);
1480 }
1481
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1482 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1483 {
1484 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1485 uint32_t pg_always_on_cu_num = 2;
1486 uint32_t always_on_cu_num;
1487 uint32_t i, j, k;
1488 uint32_t mask, cu_bitmap, counter;
1489
1490 if (adev->flags & AMD_IS_APU)
1491 always_on_cu_num = 4;
1492 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1493 always_on_cu_num = 8;
1494 else
1495 always_on_cu_num = 12;
1496
1497 mutex_lock(&adev->grbm_idx_mutex);
1498 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1499 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1500 mask = 1;
1501 cu_bitmap = 0;
1502 counter = 0;
1503 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1504
1505 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1506 if (cu_info->bitmap[0][i][j] & mask) {
1507 if (counter == pg_always_on_cu_num)
1508 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1509 if (counter < always_on_cu_num)
1510 cu_bitmap |= mask;
1511 else
1512 break;
1513 counter++;
1514 }
1515 mask <<= 1;
1516 }
1517
1518 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1519 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1520 }
1521 }
1522 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1523 mutex_unlock(&adev->grbm_idx_mutex);
1524 }
1525
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1526 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1527 {
1528 uint32_t data;
1529
1530 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1531 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1532 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1533 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1534 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1535
1536 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1537 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1538
1539 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1540 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1541
1542 mutex_lock(&adev->grbm_idx_mutex);
1543 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1544 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1545 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1546
1547 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1548 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1549 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1550 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1551 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1552
1553 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1554 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1555 data &= 0x0000FFFF;
1556 data |= 0x00C00000;
1557 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1558
1559 /*
1560 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1561 * programmed in gfx_v9_0_init_always_on_cu_mask()
1562 */
1563
1564 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1565 * but used for RLC_LB_CNTL configuration */
1566 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1567 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1568 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1569 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1570 mutex_unlock(&adev->grbm_idx_mutex);
1571
1572 gfx_v9_0_init_always_on_cu_mask(adev);
1573 }
1574
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1575 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1576 {
1577 uint32_t data;
1578
1579 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1580 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1581 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1582 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1583 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1584
1585 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1586 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1587
1588 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1589 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1590
1591 mutex_lock(&adev->grbm_idx_mutex);
1592 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1593 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1594 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1595
1596 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1597 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1598 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1599 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1600 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1601
1602 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1603 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1604 data &= 0x0000FFFF;
1605 data |= 0x00C00000;
1606 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1607
1608 /*
1609 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1610 * programmed in gfx_v9_0_init_always_on_cu_mask()
1611 */
1612
1613 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1614 * but used for RLC_LB_CNTL configuration */
1615 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1616 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1617 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1618 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1619 mutex_unlock(&adev->grbm_idx_mutex);
1620
1621 gfx_v9_0_init_always_on_cu_mask(adev);
1622 }
1623
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1624 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1625 {
1626 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1627 }
1628
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1629 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1630 {
1631 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1632 return 5;
1633 else
1634 return 4;
1635 }
1636
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1637 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1638 {
1639 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1640
1641 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1642 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1643 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1644 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1645 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1646 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1647 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1648 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1649 adev->gfx.rlc.rlcg_reg_access_supported = true;
1650 }
1651
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1652 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1653 {
1654 const struct cs_section_def *cs_data;
1655 int r;
1656
1657 adev->gfx.rlc.cs_data = gfx9_cs_data;
1658
1659 cs_data = adev->gfx.rlc.cs_data;
1660
1661 if (cs_data) {
1662 /* init clear state block */
1663 r = amdgpu_gfx_rlc_init_csb(adev);
1664 if (r)
1665 return r;
1666 }
1667
1668 if (adev->flags & AMD_IS_APU) {
1669 /* TODO: double check the cp_table_size for RV */
1670 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1671 r = amdgpu_gfx_rlc_init_cpt(adev);
1672 if (r)
1673 return r;
1674 }
1675
1676 return 0;
1677 }
1678
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1679 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1680 {
1681 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1682 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1683 }
1684
gfx_v9_0_mec_init(struct amdgpu_device * adev)1685 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1686 {
1687 int r;
1688 u32 *hpd;
1689 const __le32 *fw_data;
1690 unsigned fw_size;
1691 u32 *fw;
1692 size_t mec_hpd_size;
1693
1694 const struct gfx_firmware_header_v1_0 *mec_hdr;
1695
1696 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1697
1698 /* take ownership of the relevant compute queues */
1699 amdgpu_gfx_compute_queue_acquire(adev);
1700 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1701 if (mec_hpd_size) {
1702 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1703 AMDGPU_GEM_DOMAIN_VRAM |
1704 AMDGPU_GEM_DOMAIN_GTT,
1705 &adev->gfx.mec.hpd_eop_obj,
1706 &adev->gfx.mec.hpd_eop_gpu_addr,
1707 (void **)&hpd);
1708 if (r) {
1709 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1710 gfx_v9_0_mec_fini(adev);
1711 return r;
1712 }
1713
1714 memset(hpd, 0, mec_hpd_size);
1715
1716 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1717 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1718 }
1719
1720 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1721
1722 fw_data = (const __le32 *)
1723 (adev->gfx.mec_fw->data +
1724 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1725 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1726
1727 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1728 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1729 &adev->gfx.mec.mec_fw_obj,
1730 &adev->gfx.mec.mec_fw_gpu_addr,
1731 (void **)&fw);
1732 if (r) {
1733 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1734 gfx_v9_0_mec_fini(adev);
1735 return r;
1736 }
1737
1738 memcpy(fw, fw_data, fw_size);
1739
1740 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1741 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1742
1743 return 0;
1744 }
1745
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1746 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1747 {
1748 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1749 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1750 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1751 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1752 (SQ_IND_INDEX__FORCE_READ_MASK));
1753 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1754 }
1755
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1756 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1757 uint32_t wave, uint32_t thread,
1758 uint32_t regno, uint32_t num, uint32_t *out)
1759 {
1760 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1761 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1762 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1763 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1764 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1765 (SQ_IND_INDEX__FORCE_READ_MASK) |
1766 (SQ_IND_INDEX__AUTO_INCR_MASK));
1767 while (num--)
1768 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1769 }
1770
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1771 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1772 {
1773 /* type 1 wave data */
1774 dst[(*no_fields)++] = 1;
1775 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1776 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1777 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1778 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1779 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1780 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1781 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1782 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1783 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1784 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1785 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1786 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1787 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1788 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1789 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1790 }
1791
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1792 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1793 uint32_t wave, uint32_t start,
1794 uint32_t size, uint32_t *dst)
1795 {
1796 wave_read_regs(
1797 adev, simd, wave, 0,
1798 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1799 }
1800
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1801 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1802 uint32_t wave, uint32_t thread,
1803 uint32_t start, uint32_t size,
1804 uint32_t *dst)
1805 {
1806 wave_read_regs(
1807 adev, simd, wave, thread,
1808 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1809 }
1810
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1811 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1812 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1813 {
1814 soc15_grbm_select(adev, me, pipe, q, vm, 0);
1815 }
1816
1817 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1818 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1819 .select_se_sh = &gfx_v9_0_select_se_sh,
1820 .read_wave_data = &gfx_v9_0_read_wave_data,
1821 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1822 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1823 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1824 };
1825
1826 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
1827 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1828 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1829 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1830 };
1831
1832 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1833 .ras_block = {
1834 .hw_ops = &gfx_v9_0_ras_ops,
1835 },
1836 };
1837
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)1838 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1839 {
1840 u32 gb_addr_config;
1841 int err;
1842
1843 switch (adev->ip_versions[GC_HWIP][0]) {
1844 case IP_VERSION(9, 0, 1):
1845 adev->gfx.config.max_hw_contexts = 8;
1846 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1847 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1848 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1849 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1850 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1851 break;
1852 case IP_VERSION(9, 2, 1):
1853 adev->gfx.config.max_hw_contexts = 8;
1854 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1858 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1859 DRM_INFO("fix gfx.config for vega12\n");
1860 break;
1861 case IP_VERSION(9, 4, 0):
1862 adev->gfx.ras = &gfx_v9_0_ras;
1863 adev->gfx.config.max_hw_contexts = 8;
1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1868 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1869 gb_addr_config &= ~0xf3e777ff;
1870 gb_addr_config |= 0x22014042;
1871 /* check vbios table if gpu info is not available */
1872 err = amdgpu_atomfirmware_get_gfx_info(adev);
1873 if (err)
1874 return err;
1875 break;
1876 case IP_VERSION(9, 2, 2):
1877 case IP_VERSION(9, 1, 0):
1878 adev->gfx.config.max_hw_contexts = 8;
1879 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1883 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1884 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1885 else
1886 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1887 break;
1888 case IP_VERSION(9, 4, 1):
1889 adev->gfx.ras = &gfx_v9_4_ras;
1890 adev->gfx.config.max_hw_contexts = 8;
1891 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1892 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1893 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1894 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1895 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1896 gb_addr_config &= ~0xf3e777ff;
1897 gb_addr_config |= 0x22014042;
1898 break;
1899 case IP_VERSION(9, 3, 0):
1900 adev->gfx.config.max_hw_contexts = 8;
1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1905 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1906 gb_addr_config &= ~0xf3e777ff;
1907 gb_addr_config |= 0x22010042;
1908 break;
1909 case IP_VERSION(9, 4, 2):
1910 adev->gfx.ras = &gfx_v9_4_2_ras;
1911 adev->gfx.config.max_hw_contexts = 8;
1912 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1913 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1914 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1915 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1916 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1917 gb_addr_config &= ~0xf3e777ff;
1918 gb_addr_config |= 0x22014042;
1919 /* check vbios table if gpu info is not available */
1920 err = amdgpu_atomfirmware_get_gfx_info(adev);
1921 if (err)
1922 return err;
1923 break;
1924 default:
1925 BUG();
1926 break;
1927 }
1928
1929 adev->gfx.config.gb_addr_config = gb_addr_config;
1930
1931 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1932 REG_GET_FIELD(
1933 adev->gfx.config.gb_addr_config,
1934 GB_ADDR_CONFIG,
1935 NUM_PIPES);
1936
1937 adev->gfx.config.max_tile_pipes =
1938 adev->gfx.config.gb_addr_config_fields.num_pipes;
1939
1940 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1941 REG_GET_FIELD(
1942 adev->gfx.config.gb_addr_config,
1943 GB_ADDR_CONFIG,
1944 NUM_BANKS);
1945 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1946 REG_GET_FIELD(
1947 adev->gfx.config.gb_addr_config,
1948 GB_ADDR_CONFIG,
1949 MAX_COMPRESSED_FRAGS);
1950 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1951 REG_GET_FIELD(
1952 adev->gfx.config.gb_addr_config,
1953 GB_ADDR_CONFIG,
1954 NUM_RB_PER_SE);
1955 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1956 REG_GET_FIELD(
1957 adev->gfx.config.gb_addr_config,
1958 GB_ADDR_CONFIG,
1959 NUM_SHADER_ENGINES);
1960 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1961 REG_GET_FIELD(
1962 adev->gfx.config.gb_addr_config,
1963 GB_ADDR_CONFIG,
1964 PIPE_INTERLEAVE_SIZE));
1965
1966 return 0;
1967 }
1968
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1969 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1970 int mec, int pipe, int queue)
1971 {
1972 unsigned irq_type;
1973 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1974 unsigned int hw_prio;
1975
1976 ring = &adev->gfx.compute_ring[ring_id];
1977
1978 /* mec0 is me1 */
1979 ring->me = mec + 1;
1980 ring->pipe = pipe;
1981 ring->queue = queue;
1982
1983 ring->ring_obj = NULL;
1984 ring->use_doorbell = true;
1985 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1986 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1987 + (ring_id * GFX9_MEC_HPD_SIZE);
1988 ring->vm_hub = AMDGPU_GFXHUB(0);
1989 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1990
1991 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1992 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1993 + ring->pipe;
1994 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1995 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1996 /* type-2 packets are deprecated on MEC, use type-3 instead */
1997 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1998 hw_prio, NULL);
1999 }
2000
gfx_v9_0_sw_init(void * handle)2001 static int gfx_v9_0_sw_init(void *handle)
2002 {
2003 int i, j, k, r, ring_id;
2004 struct amdgpu_ring *ring;
2005 struct amdgpu_kiq *kiq;
2006 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2007 unsigned int hw_prio;
2008
2009 switch (adev->ip_versions[GC_HWIP][0]) {
2010 case IP_VERSION(9, 0, 1):
2011 case IP_VERSION(9, 2, 1):
2012 case IP_VERSION(9, 4, 0):
2013 case IP_VERSION(9, 2, 2):
2014 case IP_VERSION(9, 1, 0):
2015 case IP_VERSION(9, 4, 1):
2016 case IP_VERSION(9, 3, 0):
2017 case IP_VERSION(9, 4, 2):
2018 adev->gfx.mec.num_mec = 2;
2019 break;
2020 default:
2021 adev->gfx.mec.num_mec = 1;
2022 break;
2023 }
2024
2025 adev->gfx.mec.num_pipe_per_mec = 4;
2026 adev->gfx.mec.num_queue_per_pipe = 8;
2027
2028 /* EOP Event */
2029 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2030 if (r)
2031 return r;
2032
2033 /* Privileged reg */
2034 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2035 &adev->gfx.priv_reg_irq);
2036 if (r)
2037 return r;
2038
2039 /* Privileged inst */
2040 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2041 &adev->gfx.priv_inst_irq);
2042 if (r)
2043 return r;
2044
2045 /* ECC error */
2046 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2047 &adev->gfx.cp_ecc_error_irq);
2048 if (r)
2049 return r;
2050
2051 /* FUE error */
2052 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2053 &adev->gfx.cp_ecc_error_irq);
2054 if (r)
2055 return r;
2056
2057 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2058
2059 if (adev->gfx.rlc.funcs) {
2060 if (adev->gfx.rlc.funcs->init) {
2061 r = adev->gfx.rlc.funcs->init(adev);
2062 if (r) {
2063 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2064 return r;
2065 }
2066 }
2067 }
2068
2069 r = gfx_v9_0_mec_init(adev);
2070 if (r) {
2071 DRM_ERROR("Failed to init MEC BOs!\n");
2072 return r;
2073 }
2074
2075 /* set up the gfx ring */
2076 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2077 ring = &adev->gfx.gfx_ring[i];
2078 ring->ring_obj = NULL;
2079 if (!i)
2080 sprintf(ring->name, "gfx");
2081 else
2082 sprintf(ring->name, "gfx_%d", i);
2083 ring->use_doorbell = true;
2084 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2085
2086 /* disable scheduler on the real ring */
2087 ring->no_scheduler = true;
2088 ring->vm_hub = AMDGPU_GFXHUB(0);
2089 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2090 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2091 AMDGPU_RING_PRIO_DEFAULT, NULL);
2092 if (r)
2093 return r;
2094 }
2095
2096 /* set up the software rings */
2097 if (adev->gfx.num_gfx_rings) {
2098 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2099 ring = &adev->gfx.sw_gfx_ring[i];
2100 ring->ring_obj = NULL;
2101 sprintf(ring->name, amdgpu_sw_ring_name(i));
2102 ring->use_doorbell = true;
2103 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2104 ring->is_sw_ring = true;
2105 hw_prio = amdgpu_sw_ring_priority(i);
2106 ring->vm_hub = AMDGPU_GFXHUB(0);
2107 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2108 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2109 NULL);
2110 if (r)
2111 return r;
2112 ring->wptr = 0;
2113 }
2114
2115 /* init the muxer and add software rings */
2116 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2117 GFX9_NUM_SW_GFX_RINGS);
2118 if (r) {
2119 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2120 return r;
2121 }
2122 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2123 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2124 &adev->gfx.sw_gfx_ring[i]);
2125 if (r) {
2126 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2127 return r;
2128 }
2129 }
2130 }
2131
2132 /* set up the compute queues - allocate horizontally across pipes */
2133 ring_id = 0;
2134 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2135 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2136 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2137 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2138 k, j))
2139 continue;
2140
2141 r = gfx_v9_0_compute_ring_init(adev,
2142 ring_id,
2143 i, k, j);
2144 if (r)
2145 return r;
2146
2147 ring_id++;
2148 }
2149 }
2150 }
2151
2152 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2153 if (r) {
2154 DRM_ERROR("Failed to init KIQ BOs!\n");
2155 return r;
2156 }
2157
2158 kiq = &adev->gfx.kiq[0];
2159 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
2160 if (r)
2161 return r;
2162
2163 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2164 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2165 if (r)
2166 return r;
2167
2168 adev->gfx.ce_ram_size = 0x8000;
2169
2170 r = gfx_v9_0_gpu_early_init(adev);
2171 if (r)
2172 return r;
2173
2174 if (amdgpu_gfx_ras_sw_init(adev)) {
2175 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2176 return -EINVAL;
2177 }
2178
2179 return 0;
2180 }
2181
2182
gfx_v9_0_sw_fini(void * handle)2183 static int gfx_v9_0_sw_fini(void *handle)
2184 {
2185 int i;
2186 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2187
2188 if (adev->gfx.num_gfx_rings) {
2189 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2190 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2191 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2192 }
2193
2194 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2195 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2196 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2197 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2198
2199 amdgpu_gfx_mqd_sw_fini(adev, 0);
2200 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2201 amdgpu_gfx_kiq_fini(adev, 0);
2202
2203 gfx_v9_0_mec_fini(adev);
2204 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2205 &adev->gfx.rlc.clear_state_gpu_addr,
2206 (void **)&adev->gfx.rlc.cs_ptr);
2207 if (adev->flags & AMD_IS_APU) {
2208 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2209 &adev->gfx.rlc.cp_table_gpu_addr,
2210 (void **)&adev->gfx.rlc.cp_table_ptr);
2211 }
2212 gfx_v9_0_free_microcode(adev);
2213
2214 return 0;
2215 }
2216
2217
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2218 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2219 {
2220 /* TODO */
2221 }
2222
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2223 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2224 u32 instance, int xcc_id)
2225 {
2226 u32 data;
2227
2228 if (instance == 0xffffffff)
2229 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2230 else
2231 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2232
2233 if (se_num == 0xffffffff)
2234 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2235 else
2236 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2237
2238 if (sh_num == 0xffffffff)
2239 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2240 else
2241 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2242
2243 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2244 }
2245
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2246 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2247 {
2248 u32 data, mask;
2249
2250 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2251 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2252
2253 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2254 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2255
2256 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2257 adev->gfx.config.max_sh_per_se);
2258
2259 return (~data) & mask;
2260 }
2261
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2262 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2263 {
2264 int i, j;
2265 u32 data;
2266 u32 active_rbs = 0;
2267 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2268 adev->gfx.config.max_sh_per_se;
2269
2270 mutex_lock(&adev->grbm_idx_mutex);
2271 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2272 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2273 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2274 data = gfx_v9_0_get_rb_active_bitmap(adev);
2275 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2276 rb_bitmap_width_per_sh);
2277 }
2278 }
2279 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2280 mutex_unlock(&adev->grbm_idx_mutex);
2281
2282 adev->gfx.config.backend_enable_mask = active_rbs;
2283 adev->gfx.config.num_rbs = hweight32(active_rbs);
2284 }
2285
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2286 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2287 uint32_t first_vmid,
2288 uint32_t last_vmid)
2289 {
2290 uint32_t data;
2291 uint32_t trap_config_vmid_mask = 0;
2292 int i;
2293
2294 /* Calculate trap config vmid mask */
2295 for (i = first_vmid; i < last_vmid; i++)
2296 trap_config_vmid_mask |= (1 << i);
2297
2298 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2299 VMID_SEL, trap_config_vmid_mask);
2300 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2301 TRAP_EN, 1);
2302 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2303 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2304
2305 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2306 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2307 }
2308
2309 #define DEFAULT_SH_MEM_BASES (0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2310 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2311 {
2312 int i;
2313 uint32_t sh_mem_config;
2314 uint32_t sh_mem_bases;
2315
2316 /*
2317 * Configure apertures:
2318 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2319 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2320 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2321 */
2322 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2323
2324 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2325 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2326 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2327
2328 mutex_lock(&adev->srbm_mutex);
2329 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2330 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2331 /* CP and shaders */
2332 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2333 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2334 }
2335 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2336 mutex_unlock(&adev->srbm_mutex);
2337
2338 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2339 access. These should be enabled by FW for target VMIDs. */
2340 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2341 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2342 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2343 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2344 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2345 }
2346 }
2347
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2348 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2349 {
2350 int vmid;
2351
2352 /*
2353 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2354 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2355 * the driver can enable them for graphics. VMID0 should maintain
2356 * access so that HWS firmware can save/restore entries.
2357 */
2358 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2359 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2360 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2361 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2362 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2363 }
2364 }
2365
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2366 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2367 {
2368 uint32_t tmp;
2369
2370 switch (adev->ip_versions[GC_HWIP][0]) {
2371 case IP_VERSION(9, 4, 1):
2372 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2373 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2374 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2375 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2376 break;
2377 default:
2378 break;
2379 }
2380 }
2381
gfx_v9_0_constants_init(struct amdgpu_device * adev)2382 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2383 {
2384 u32 tmp;
2385 int i;
2386
2387 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2388
2389 gfx_v9_0_tiling_mode_table_init(adev);
2390
2391 if (adev->gfx.num_gfx_rings)
2392 gfx_v9_0_setup_rb(adev);
2393 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2394 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2395
2396 /* XXX SH_MEM regs */
2397 /* where to put LDS, scratch, GPUVM in FSA64 space */
2398 mutex_lock(&adev->srbm_mutex);
2399 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2400 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2401 /* CP and shaders */
2402 if (i == 0) {
2403 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2404 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2405 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2406 !!adev->gmc.noretry);
2407 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2408 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2409 } else {
2410 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2411 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2412 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2413 !!adev->gmc.noretry);
2414 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2415 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2416 (adev->gmc.private_aperture_start >> 48));
2417 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2418 (adev->gmc.shared_aperture_start >> 48));
2419 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2420 }
2421 }
2422 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2423
2424 mutex_unlock(&adev->srbm_mutex);
2425
2426 gfx_v9_0_init_compute_vmid(adev);
2427 gfx_v9_0_init_gds_vmid(adev);
2428 gfx_v9_0_init_sq_config(adev);
2429 }
2430
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2431 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2432 {
2433 u32 i, j, k;
2434 u32 mask;
2435
2436 mutex_lock(&adev->grbm_idx_mutex);
2437 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2438 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2439 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2440 for (k = 0; k < adev->usec_timeout; k++) {
2441 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2442 break;
2443 udelay(1);
2444 }
2445 if (k == adev->usec_timeout) {
2446 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2447 0xffffffff, 0xffffffff, 0);
2448 mutex_unlock(&adev->grbm_idx_mutex);
2449 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2450 i, j);
2451 return;
2452 }
2453 }
2454 }
2455 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2456 mutex_unlock(&adev->grbm_idx_mutex);
2457
2458 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2459 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2460 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2461 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2462 for (k = 0; k < adev->usec_timeout; k++) {
2463 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2464 break;
2465 udelay(1);
2466 }
2467 }
2468
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2469 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2470 bool enable)
2471 {
2472 u32 tmp;
2473
2474 /* These interrupts should be enabled to drive DS clock */
2475
2476 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2477
2478 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2479 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2480 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2481 if(adev->gfx.num_gfx_rings)
2482 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2483
2484 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2485 }
2486
gfx_v9_0_init_csb(struct amdgpu_device * adev)2487 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2488 {
2489 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2490 /* csib */
2491 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2492 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2493 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2494 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2495 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2496 adev->gfx.rlc.clear_state_size);
2497 }
2498
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2499 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2500 int indirect_offset,
2501 int list_size,
2502 int *unique_indirect_regs,
2503 int unique_indirect_reg_count,
2504 int *indirect_start_offsets,
2505 int *indirect_start_offsets_count,
2506 int max_start_offsets_count)
2507 {
2508 int idx;
2509
2510 for (; indirect_offset < list_size; indirect_offset++) {
2511 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2512 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2513 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2514
2515 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2516 indirect_offset += 2;
2517
2518 /* look for the matching indice */
2519 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2520 if (unique_indirect_regs[idx] ==
2521 register_list_format[indirect_offset] ||
2522 !unique_indirect_regs[idx])
2523 break;
2524 }
2525
2526 BUG_ON(idx >= unique_indirect_reg_count);
2527
2528 if (!unique_indirect_regs[idx])
2529 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2530
2531 indirect_offset++;
2532 }
2533 }
2534 }
2535
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2536 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2537 {
2538 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2539 int unique_indirect_reg_count = 0;
2540
2541 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2542 int indirect_start_offsets_count = 0;
2543
2544 int list_size = 0;
2545 int i = 0, j = 0;
2546 u32 tmp = 0;
2547
2548 u32 *register_list_format =
2549 kmemdup(adev->gfx.rlc.register_list_format,
2550 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2551 if (!register_list_format)
2552 return -ENOMEM;
2553
2554 /* setup unique_indirect_regs array and indirect_start_offsets array */
2555 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2556 gfx_v9_1_parse_ind_reg_list(register_list_format,
2557 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2558 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2559 unique_indirect_regs,
2560 unique_indirect_reg_count,
2561 indirect_start_offsets,
2562 &indirect_start_offsets_count,
2563 ARRAY_SIZE(indirect_start_offsets));
2564
2565 /* enable auto inc in case it is disabled */
2566 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2567 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2568 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2569
2570 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2571 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2572 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2573 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2574 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2575 adev->gfx.rlc.register_restore[i]);
2576
2577 /* load indirect register */
2578 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2579 adev->gfx.rlc.reg_list_format_start);
2580
2581 /* direct register portion */
2582 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2583 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2584 register_list_format[i]);
2585
2586 /* indirect register portion */
2587 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2588 if (register_list_format[i] == 0xFFFFFFFF) {
2589 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2590 continue;
2591 }
2592
2593 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2595
2596 for (j = 0; j < unique_indirect_reg_count; j++) {
2597 if (register_list_format[i] == unique_indirect_regs[j]) {
2598 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2599 break;
2600 }
2601 }
2602
2603 BUG_ON(j >= unique_indirect_reg_count);
2604
2605 i++;
2606 }
2607
2608 /* set save/restore list size */
2609 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2610 list_size = list_size >> 1;
2611 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2612 adev->gfx.rlc.reg_restore_list_size);
2613 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2614
2615 /* write the starting offsets to RLC scratch ram */
2616 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2617 adev->gfx.rlc.starting_offsets_start);
2618 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2619 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2620 indirect_start_offsets[i]);
2621
2622 /* load unique indirect regs*/
2623 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2624 if (unique_indirect_regs[i] != 0) {
2625 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2626 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2627 unique_indirect_regs[i] & 0x3FFFF);
2628
2629 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2630 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2631 unique_indirect_regs[i] >> 20);
2632 }
2633 }
2634
2635 kfree(register_list_format);
2636 return 0;
2637 }
2638
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2639 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2640 {
2641 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2642 }
2643
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2644 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2645 bool enable)
2646 {
2647 uint32_t data = 0;
2648 uint32_t default_data = 0;
2649
2650 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2651 if (enable) {
2652 /* enable GFXIP control over CGPG */
2653 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2654 if(default_data != data)
2655 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2656
2657 /* update status */
2658 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2659 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2660 if(default_data != data)
2661 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2662 } else {
2663 /* restore GFXIP control over GCPG */
2664 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2665 if(default_data != data)
2666 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2667 }
2668 }
2669
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2670 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2671 {
2672 uint32_t data = 0;
2673
2674 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2675 AMD_PG_SUPPORT_GFX_SMG |
2676 AMD_PG_SUPPORT_GFX_DMG)) {
2677 /* init IDLE_POLL_COUNT = 60 */
2678 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2679 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2680 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2681 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2682
2683 /* init RLC PG Delay */
2684 data = 0;
2685 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2686 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2687 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2688 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2689 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2690
2691 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2692 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2693 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2694 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2695
2696 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2697 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2698 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2699 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2700
2701 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2702 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2703
2704 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2705 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2706 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2707 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2708 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2709 }
2710 }
2711
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2712 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2713 bool enable)
2714 {
2715 uint32_t data = 0;
2716 uint32_t default_data = 0;
2717
2718 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2719 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2720 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2721 enable ? 1 : 0);
2722 if (default_data != data)
2723 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2724 }
2725
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2726 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2727 bool enable)
2728 {
2729 uint32_t data = 0;
2730 uint32_t default_data = 0;
2731
2732 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2733 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2734 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2735 enable ? 1 : 0);
2736 if(default_data != data)
2737 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2738 }
2739
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2740 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2741 bool enable)
2742 {
2743 uint32_t data = 0;
2744 uint32_t default_data = 0;
2745
2746 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2747 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2748 CP_PG_DISABLE,
2749 enable ? 0 : 1);
2750 if(default_data != data)
2751 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2752 }
2753
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)2754 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2755 bool enable)
2756 {
2757 uint32_t data, default_data;
2758
2759 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2760 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2761 GFX_POWER_GATING_ENABLE,
2762 enable ? 1 : 0);
2763 if(default_data != data)
2764 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2765 }
2766
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)2767 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2768 bool enable)
2769 {
2770 uint32_t data, default_data;
2771
2772 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2773 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2774 GFX_PIPELINE_PG_ENABLE,
2775 enable ? 1 : 0);
2776 if(default_data != data)
2777 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2778
2779 if (!enable)
2780 /* read any GFX register to wake up GFX */
2781 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2782 }
2783
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)2784 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2785 bool enable)
2786 {
2787 uint32_t data, default_data;
2788
2789 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2790 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2791 STATIC_PER_CU_PG_ENABLE,
2792 enable ? 1 : 0);
2793 if(default_data != data)
2794 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2795 }
2796
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)2797 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2798 bool enable)
2799 {
2800 uint32_t data, default_data;
2801
2802 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2803 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2804 DYN_PER_CU_PG_ENABLE,
2805 enable ? 1 : 0);
2806 if(default_data != data)
2807 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2808 }
2809
gfx_v9_0_init_pg(struct amdgpu_device * adev)2810 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2811 {
2812 gfx_v9_0_init_csb(adev);
2813
2814 /*
2815 * Rlc save restore list is workable since v2_1.
2816 * And it's needed by gfxoff feature.
2817 */
2818 if (adev->gfx.rlc.is_rlc_v2_1) {
2819 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2820 (adev->apu_flags & AMD_APU_IS_RAVEN2))
2821 gfx_v9_1_init_rlc_save_restore_list(adev);
2822 gfx_v9_0_enable_save_restore_machine(adev);
2823 }
2824
2825 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2826 AMD_PG_SUPPORT_GFX_SMG |
2827 AMD_PG_SUPPORT_GFX_DMG |
2828 AMD_PG_SUPPORT_CP |
2829 AMD_PG_SUPPORT_GDS |
2830 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2831 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2832 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2833 gfx_v9_0_init_gfx_power_gating(adev);
2834 }
2835 }
2836
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)2837 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2838 {
2839 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2840 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2841 gfx_v9_0_wait_for_rlc_serdes(adev);
2842 }
2843
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)2844 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2845 {
2846 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2847 udelay(50);
2848 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2849 udelay(50);
2850 }
2851
gfx_v9_0_rlc_start(struct amdgpu_device * adev)2852 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2853 {
2854 #ifdef AMDGPU_RLC_DEBUG_RETRY
2855 u32 rlc_ucode_ver;
2856 #endif
2857
2858 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2859 udelay(50);
2860
2861 /* carrizo do enable cp interrupt after cp inited */
2862 if (!(adev->flags & AMD_IS_APU)) {
2863 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2864 udelay(50);
2865 }
2866
2867 #ifdef AMDGPU_RLC_DEBUG_RETRY
2868 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2869 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2870 if(rlc_ucode_ver == 0x108) {
2871 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2872 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2873 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2874 * default is 0x9C4 to create a 100us interval */
2875 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2876 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2877 * to disable the page fault retry interrupts, default is
2878 * 0x100 (256) */
2879 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2880 }
2881 #endif
2882 }
2883
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)2884 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2885 {
2886 const struct rlc_firmware_header_v2_0 *hdr;
2887 const __le32 *fw_data;
2888 unsigned i, fw_size;
2889
2890 if (!adev->gfx.rlc_fw)
2891 return -EINVAL;
2892
2893 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2894 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2895
2896 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2897 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2898 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2899
2900 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2901 RLCG_UCODE_LOADING_START_ADDRESS);
2902 for (i = 0; i < fw_size; i++)
2903 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2904 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2905
2906 return 0;
2907 }
2908
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)2909 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2910 {
2911 int r;
2912
2913 if (amdgpu_sriov_vf(adev)) {
2914 gfx_v9_0_init_csb(adev);
2915 return 0;
2916 }
2917
2918 adev->gfx.rlc.funcs->stop(adev);
2919
2920 /* disable CG */
2921 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2922
2923 gfx_v9_0_init_pg(adev);
2924
2925 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2926 /* legacy rlc firmware loading */
2927 r = gfx_v9_0_rlc_load_microcode(adev);
2928 if (r)
2929 return r;
2930 }
2931
2932 switch (adev->ip_versions[GC_HWIP][0]) {
2933 case IP_VERSION(9, 2, 2):
2934 case IP_VERSION(9, 1, 0):
2935 gfx_v9_0_init_lbpw(adev);
2936 if (amdgpu_lbpw == 0)
2937 gfx_v9_0_enable_lbpw(adev, false);
2938 else
2939 gfx_v9_0_enable_lbpw(adev, true);
2940 break;
2941 case IP_VERSION(9, 4, 0):
2942 gfx_v9_4_init_lbpw(adev);
2943 if (amdgpu_lbpw > 0)
2944 gfx_v9_0_enable_lbpw(adev, true);
2945 else
2946 gfx_v9_0_enable_lbpw(adev, false);
2947 break;
2948 default:
2949 break;
2950 }
2951
2952 gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
2953
2954 adev->gfx.rlc.funcs->start(adev);
2955
2956 return 0;
2957 }
2958
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2960 {
2961 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2962
2963 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2964 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2965 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2966 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2967 udelay(50);
2968 }
2969
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)2970 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2971 {
2972 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2973 const struct gfx_firmware_header_v1_0 *ce_hdr;
2974 const struct gfx_firmware_header_v1_0 *me_hdr;
2975 const __le32 *fw_data;
2976 unsigned i, fw_size;
2977
2978 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2979 return -EINVAL;
2980
2981 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2982 adev->gfx.pfp_fw->data;
2983 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2984 adev->gfx.ce_fw->data;
2985 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2986 adev->gfx.me_fw->data;
2987
2988 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2989 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2990 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2991
2992 gfx_v9_0_cp_gfx_enable(adev, false);
2993
2994 /* PFP */
2995 fw_data = (const __le32 *)
2996 (adev->gfx.pfp_fw->data +
2997 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2998 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2999 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3000 for (i = 0; i < fw_size; i++)
3001 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3002 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3003
3004 /* CE */
3005 fw_data = (const __le32 *)
3006 (adev->gfx.ce_fw->data +
3007 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3008 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3009 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3010 for (i = 0; i < fw_size; i++)
3011 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3012 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3013
3014 /* ME */
3015 fw_data = (const __le32 *)
3016 (adev->gfx.me_fw->data +
3017 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3018 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3019 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3020 for (i = 0; i < fw_size; i++)
3021 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3022 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3023
3024 return 0;
3025 }
3026
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3027 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3028 {
3029 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3030 const struct cs_section_def *sect = NULL;
3031 const struct cs_extent_def *ext = NULL;
3032 int r, i, tmp;
3033
3034 /* init the CP */
3035 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3036 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3037
3038 gfx_v9_0_cp_gfx_enable(adev, true);
3039
3040 /* Now only limit the quirk on the APU gfx9 series and already
3041 * confirmed that the APU gfx10/gfx11 needn't such update.
3042 */
3043 if (adev->flags & AMD_IS_APU &&
3044 adev->in_s3 && !adev->suspend_complete) {
3045 DRM_INFO(" Will skip the CSB packet resubmit\n");
3046 return 0;
3047 }
3048 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3049 if (r) {
3050 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3051 return r;
3052 }
3053
3054 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3055 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3056
3057 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3058 amdgpu_ring_write(ring, 0x80000000);
3059 amdgpu_ring_write(ring, 0x80000000);
3060
3061 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3062 for (ext = sect->section; ext->extent != NULL; ++ext) {
3063 if (sect->id == SECT_CONTEXT) {
3064 amdgpu_ring_write(ring,
3065 PACKET3(PACKET3_SET_CONTEXT_REG,
3066 ext->reg_count));
3067 amdgpu_ring_write(ring,
3068 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3069 for (i = 0; i < ext->reg_count; i++)
3070 amdgpu_ring_write(ring, ext->extent[i]);
3071 }
3072 }
3073 }
3074
3075 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3076 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3077
3078 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3079 amdgpu_ring_write(ring, 0);
3080
3081 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3082 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3083 amdgpu_ring_write(ring, 0x8000);
3084 amdgpu_ring_write(ring, 0x8000);
3085
3086 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3087 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3088 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3089 amdgpu_ring_write(ring, tmp);
3090 amdgpu_ring_write(ring, 0);
3091
3092 amdgpu_ring_commit(ring);
3093
3094 return 0;
3095 }
3096
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3097 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3098 {
3099 struct amdgpu_ring *ring;
3100 u32 tmp;
3101 u32 rb_bufsz;
3102 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3103
3104 /* Set the write pointer delay */
3105 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3106
3107 /* set the RB to use vmid 0 */
3108 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3109
3110 /* Set ring buffer size */
3111 ring = &adev->gfx.gfx_ring[0];
3112 rb_bufsz = order_base_2(ring->ring_size / 8);
3113 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3114 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3115 #ifdef __BIG_ENDIAN
3116 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3117 #endif
3118 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3119
3120 /* Initialize the ring buffer's write pointers */
3121 ring->wptr = 0;
3122 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3123 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3124
3125 /* set the wb address wether it's enabled or not */
3126 rptr_addr = ring->rptr_gpu_addr;
3127 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3128 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3129
3130 wptr_gpu_addr = ring->wptr_gpu_addr;
3131 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3132 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3133
3134 mdelay(1);
3135 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3136
3137 rb_addr = ring->gpu_addr >> 8;
3138 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3139 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3140
3141 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3142 if (ring->use_doorbell) {
3143 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3144 DOORBELL_OFFSET, ring->doorbell_index);
3145 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3146 DOORBELL_EN, 1);
3147 } else {
3148 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3149 }
3150 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3151
3152 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3153 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3154 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3155
3156 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3157 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3158
3159
3160 /* start the ring */
3161 gfx_v9_0_cp_gfx_start(adev);
3162
3163 return 0;
3164 }
3165
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3166 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3167 {
3168 if (enable) {
3169 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3170 } else {
3171 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3172 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3173 adev->gfx.kiq[0].ring.sched.ready = false;
3174 }
3175 udelay(50);
3176 }
3177
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3178 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3179 {
3180 const struct gfx_firmware_header_v1_0 *mec_hdr;
3181 const __le32 *fw_data;
3182 unsigned i;
3183 u32 tmp;
3184
3185 if (!adev->gfx.mec_fw)
3186 return -EINVAL;
3187
3188 gfx_v9_0_cp_compute_enable(adev, false);
3189
3190 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3191 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3192
3193 fw_data = (const __le32 *)
3194 (adev->gfx.mec_fw->data +
3195 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3196 tmp = 0;
3197 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3198 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3199 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3200
3201 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3202 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3203 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3204 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3205
3206 /* MEC1 */
3207 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3208 mec_hdr->jt_offset);
3209 for (i = 0; i < mec_hdr->jt_size; i++)
3210 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3211 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3212
3213 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3214 adev->gfx.mec_fw_version);
3215 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3216
3217 return 0;
3218 }
3219
3220 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3221 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3222 {
3223 uint32_t tmp;
3224 struct amdgpu_device *adev = ring->adev;
3225
3226 /* tell RLC which is KIQ queue */
3227 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3228 tmp &= 0xffffff00;
3229 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3230 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3231 tmp |= 0x80;
3232 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3233 }
3234
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3235 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3236 {
3237 struct amdgpu_device *adev = ring->adev;
3238
3239 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3240 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3241 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3242 mqd->cp_hqd_queue_priority =
3243 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3244 }
3245 }
3246 }
3247
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3248 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3249 {
3250 struct amdgpu_device *adev = ring->adev;
3251 struct v9_mqd *mqd = ring->mqd_ptr;
3252 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3253 uint32_t tmp;
3254
3255 mqd->header = 0xC0310800;
3256 mqd->compute_pipelinestat_enable = 0x00000001;
3257 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3258 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3259 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3260 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3261 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3262 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3263 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3264 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3265 mqd->compute_misc_reserved = 0x00000003;
3266
3267 mqd->dynamic_cu_mask_addr_lo =
3268 lower_32_bits(ring->mqd_gpu_addr
3269 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3270 mqd->dynamic_cu_mask_addr_hi =
3271 upper_32_bits(ring->mqd_gpu_addr
3272 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3273
3274 eop_base_addr = ring->eop_gpu_addr >> 8;
3275 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3276 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3277
3278 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3279 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3280 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3281 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3282
3283 mqd->cp_hqd_eop_control = tmp;
3284
3285 /* enable doorbell? */
3286 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3287
3288 if (ring->use_doorbell) {
3289 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3290 DOORBELL_OFFSET, ring->doorbell_index);
3291 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3292 DOORBELL_EN, 1);
3293 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3294 DOORBELL_SOURCE, 0);
3295 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3296 DOORBELL_HIT, 0);
3297 } else {
3298 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3299 DOORBELL_EN, 0);
3300 }
3301
3302 mqd->cp_hqd_pq_doorbell_control = tmp;
3303
3304 /* disable the queue if it's active */
3305 ring->wptr = 0;
3306 mqd->cp_hqd_dequeue_request = 0;
3307 mqd->cp_hqd_pq_rptr = 0;
3308 mqd->cp_hqd_pq_wptr_lo = 0;
3309 mqd->cp_hqd_pq_wptr_hi = 0;
3310
3311 /* set the pointer to the MQD */
3312 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3313 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3314
3315 /* set MQD vmid to 0 */
3316 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3317 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3318 mqd->cp_mqd_control = tmp;
3319
3320 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3321 hqd_gpu_addr = ring->gpu_addr >> 8;
3322 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3323 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3324
3325 /* set up the HQD, this is similar to CP_RB0_CNTL */
3326 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3327 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3328 (order_base_2(ring->ring_size / 4) - 1));
3329 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3330 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3331 #ifdef __BIG_ENDIAN
3332 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3333 #endif
3334 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3335 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3336 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3337 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3338 mqd->cp_hqd_pq_control = tmp;
3339
3340 /* set the wb address whether it's enabled or not */
3341 wb_gpu_addr = ring->rptr_gpu_addr;
3342 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3343 mqd->cp_hqd_pq_rptr_report_addr_hi =
3344 upper_32_bits(wb_gpu_addr) & 0xffff;
3345
3346 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3347 wb_gpu_addr = ring->wptr_gpu_addr;
3348 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3349 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3350
3351 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3352 ring->wptr = 0;
3353 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3354
3355 /* set the vmid for the queue */
3356 mqd->cp_hqd_vmid = 0;
3357
3358 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3359 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3360 mqd->cp_hqd_persistent_state = tmp;
3361
3362 /* set MIN_IB_AVAIL_SIZE */
3363 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3364 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3365 mqd->cp_hqd_ib_control = tmp;
3366
3367 /* set static priority for a queue/ring */
3368 gfx_v9_0_mqd_set_priority(ring, mqd);
3369 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3370
3371 /* map_queues packet doesn't need activate the queue,
3372 * so only kiq need set this field.
3373 */
3374 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3375 mqd->cp_hqd_active = 1;
3376
3377 return 0;
3378 }
3379
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3380 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3381 {
3382 struct amdgpu_device *adev = ring->adev;
3383 struct v9_mqd *mqd = ring->mqd_ptr;
3384 int j;
3385
3386 /* disable wptr polling */
3387 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3388
3389 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3390 mqd->cp_hqd_eop_base_addr_lo);
3391 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3392 mqd->cp_hqd_eop_base_addr_hi);
3393
3394 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3395 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3396 mqd->cp_hqd_eop_control);
3397
3398 /* enable doorbell? */
3399 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3400 mqd->cp_hqd_pq_doorbell_control);
3401
3402 /* disable the queue if it's active */
3403 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3404 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3405 for (j = 0; j < adev->usec_timeout; j++) {
3406 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3407 break;
3408 udelay(1);
3409 }
3410 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3411 mqd->cp_hqd_dequeue_request);
3412 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3413 mqd->cp_hqd_pq_rptr);
3414 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3415 mqd->cp_hqd_pq_wptr_lo);
3416 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3417 mqd->cp_hqd_pq_wptr_hi);
3418 }
3419
3420 /* set the pointer to the MQD */
3421 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3422 mqd->cp_mqd_base_addr_lo);
3423 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3424 mqd->cp_mqd_base_addr_hi);
3425
3426 /* set MQD vmid to 0 */
3427 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3428 mqd->cp_mqd_control);
3429
3430 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3431 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3432 mqd->cp_hqd_pq_base_lo);
3433 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3434 mqd->cp_hqd_pq_base_hi);
3435
3436 /* set up the HQD, this is similar to CP_RB0_CNTL */
3437 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3438 mqd->cp_hqd_pq_control);
3439
3440 /* set the wb address whether it's enabled or not */
3441 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3442 mqd->cp_hqd_pq_rptr_report_addr_lo);
3443 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3444 mqd->cp_hqd_pq_rptr_report_addr_hi);
3445
3446 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3447 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3448 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3449 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3450 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3451
3452 /* enable the doorbell if requested */
3453 if (ring->use_doorbell) {
3454 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3455 (adev->doorbell_index.kiq * 2) << 2);
3456 /* If GC has entered CGPG, ringing doorbell > first page
3457 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3458 * workaround this issue. And this change has to align with firmware
3459 * update.
3460 */
3461 if (check_if_enlarge_doorbell_range(adev))
3462 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3463 (adev->doorbell.size - 4));
3464 else
3465 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3466 (adev->doorbell_index.userqueue_end * 2) << 2);
3467 }
3468
3469 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3470 mqd->cp_hqd_pq_doorbell_control);
3471
3472 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3473 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3474 mqd->cp_hqd_pq_wptr_lo);
3475 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3476 mqd->cp_hqd_pq_wptr_hi);
3477
3478 /* set the vmid for the queue */
3479 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3480
3481 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3482 mqd->cp_hqd_persistent_state);
3483
3484 /* activate the queue */
3485 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3486 mqd->cp_hqd_active);
3487
3488 if (ring->use_doorbell)
3489 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3490
3491 return 0;
3492 }
3493
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3494 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3495 {
3496 struct amdgpu_device *adev = ring->adev;
3497 int j;
3498
3499 /* disable the queue if it's active */
3500 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3501
3502 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3503
3504 for (j = 0; j < adev->usec_timeout; j++) {
3505 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3506 break;
3507 udelay(1);
3508 }
3509
3510 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3511 DRM_DEBUG("KIQ dequeue request failed.\n");
3512
3513 /* Manual disable if dequeue request times out */
3514 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3515 }
3516
3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3518 0);
3519 }
3520
3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3522 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3523 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3524 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3525 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3526 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3527 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3528 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3529
3530 return 0;
3531 }
3532
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3533 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3534 {
3535 struct amdgpu_device *adev = ring->adev;
3536 struct v9_mqd *mqd = ring->mqd_ptr;
3537 struct v9_mqd *tmp_mqd;
3538
3539 gfx_v9_0_kiq_setting(ring);
3540
3541 /* GPU could be in bad state during probe, driver trigger the reset
3542 * after load the SMU, in this case , the mqd is not be initialized.
3543 * driver need to re-init the mqd.
3544 * check mqd->cp_hqd_pq_control since this value should not be 0
3545 */
3546 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3547 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3548 /* for GPU_RESET case , reset MQD to a clean status */
3549 if (adev->gfx.kiq[0].mqd_backup)
3550 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3551
3552 /* reset ring buffer */
3553 ring->wptr = 0;
3554 amdgpu_ring_clear_ring(ring);
3555
3556 mutex_lock(&adev->srbm_mutex);
3557 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3558 gfx_v9_0_kiq_init_register(ring);
3559 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3560 mutex_unlock(&adev->srbm_mutex);
3561 } else {
3562 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3563 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3564 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3565 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3566 amdgpu_ring_clear_ring(ring);
3567 mutex_lock(&adev->srbm_mutex);
3568 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3569 gfx_v9_0_mqd_init(ring);
3570 gfx_v9_0_kiq_init_register(ring);
3571 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3572 mutex_unlock(&adev->srbm_mutex);
3573
3574 if (adev->gfx.kiq[0].mqd_backup)
3575 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3576 }
3577
3578 return 0;
3579 }
3580
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring)3581 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3582 {
3583 struct amdgpu_device *adev = ring->adev;
3584 struct v9_mqd *mqd = ring->mqd_ptr;
3585 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3586 struct v9_mqd *tmp_mqd;
3587
3588 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3589 * is not be initialized before
3590 */
3591 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3592
3593 if (!tmp_mqd->cp_hqd_pq_control ||
3594 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3595 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3596 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3597 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3598 mutex_lock(&adev->srbm_mutex);
3599 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3600 gfx_v9_0_mqd_init(ring);
3601 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3602 mutex_unlock(&adev->srbm_mutex);
3603
3604 if (adev->gfx.mec.mqd_backup[mqd_idx])
3605 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3606 } else {
3607 /* restore MQD to a clean status */
3608 if (adev->gfx.mec.mqd_backup[mqd_idx])
3609 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3610 /* reset ring buffer */
3611 ring->wptr = 0;
3612 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3613 amdgpu_ring_clear_ring(ring);
3614 }
3615
3616 return 0;
3617 }
3618
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3619 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3620 {
3621 struct amdgpu_ring *ring;
3622 int r;
3623
3624 ring = &adev->gfx.kiq[0].ring;
3625
3626 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3627 if (unlikely(r != 0))
3628 return r;
3629
3630 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3631 if (unlikely(r != 0)) {
3632 amdgpu_bo_unreserve(ring->mqd_obj);
3633 return r;
3634 }
3635
3636 gfx_v9_0_kiq_init_queue(ring);
3637 amdgpu_bo_kunmap(ring->mqd_obj);
3638 ring->mqd_ptr = NULL;
3639 amdgpu_bo_unreserve(ring->mqd_obj);
3640 return 0;
3641 }
3642
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3643 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3644 {
3645 struct amdgpu_ring *ring = NULL;
3646 int r = 0, i;
3647
3648 gfx_v9_0_cp_compute_enable(adev, true);
3649
3650 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3651 ring = &adev->gfx.compute_ring[i];
3652
3653 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3654 if (unlikely(r != 0))
3655 goto done;
3656 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3657 if (!r) {
3658 r = gfx_v9_0_kcq_init_queue(ring);
3659 amdgpu_bo_kunmap(ring->mqd_obj);
3660 ring->mqd_ptr = NULL;
3661 }
3662 amdgpu_bo_unreserve(ring->mqd_obj);
3663 if (r)
3664 goto done;
3665 }
3666
3667 r = amdgpu_gfx_enable_kcq(adev, 0);
3668 done:
3669 return r;
3670 }
3671
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3672 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3673 {
3674 int r, i;
3675 struct amdgpu_ring *ring;
3676
3677 if (!(adev->flags & AMD_IS_APU))
3678 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3679
3680 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3681 if (adev->gfx.num_gfx_rings) {
3682 /* legacy firmware loading */
3683 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3684 if (r)
3685 return r;
3686 }
3687
3688 r = gfx_v9_0_cp_compute_load_microcode(adev);
3689 if (r)
3690 return r;
3691 }
3692
3693 r = gfx_v9_0_kiq_resume(adev);
3694 if (r)
3695 return r;
3696
3697 if (adev->gfx.num_gfx_rings) {
3698 r = gfx_v9_0_cp_gfx_resume(adev);
3699 if (r)
3700 return r;
3701 }
3702
3703 r = gfx_v9_0_kcq_resume(adev);
3704 if (r)
3705 return r;
3706
3707 if (adev->gfx.num_gfx_rings) {
3708 ring = &adev->gfx.gfx_ring[0];
3709 r = amdgpu_ring_test_helper(ring);
3710 if (r)
3711 return r;
3712 }
3713
3714 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3715 ring = &adev->gfx.compute_ring[i];
3716 amdgpu_ring_test_helper(ring);
3717 }
3718
3719 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3720
3721 return 0;
3722 }
3723
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)3724 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3725 {
3726 u32 tmp;
3727
3728 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3729 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3730 return;
3731
3732 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3733 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3734 adev->df.hash_status.hash_64k);
3735 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3736 adev->df.hash_status.hash_2m);
3737 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3738 adev->df.hash_status.hash_1g);
3739 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3740 }
3741
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)3742 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3743 {
3744 if (adev->gfx.num_gfx_rings)
3745 gfx_v9_0_cp_gfx_enable(adev, enable);
3746 gfx_v9_0_cp_compute_enable(adev, enable);
3747 }
3748
gfx_v9_0_hw_init(void * handle)3749 static int gfx_v9_0_hw_init(void *handle)
3750 {
3751 int r;
3752 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3753
3754 if (!amdgpu_sriov_vf(adev))
3755 gfx_v9_0_init_golden_registers(adev);
3756
3757 gfx_v9_0_constants_init(adev);
3758
3759 gfx_v9_0_init_tcp_config(adev);
3760
3761 r = adev->gfx.rlc.funcs->resume(adev);
3762 if (r)
3763 return r;
3764
3765 r = gfx_v9_0_cp_resume(adev);
3766 if (r)
3767 return r;
3768
3769 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3770 gfx_v9_4_2_set_power_brake_sequence(adev);
3771
3772 return r;
3773 }
3774
gfx_v9_0_hw_fini(void * handle)3775 static int gfx_v9_0_hw_fini(void *handle)
3776 {
3777 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3778
3779 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3780 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3781 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3782 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3783
3784 /* DF freeze and kcq disable will fail */
3785 if (!amdgpu_ras_intr_triggered())
3786 /* disable KCQ to avoid CPC touch memory not valid anymore */
3787 amdgpu_gfx_disable_kcq(adev, 0);
3788
3789 if (amdgpu_sriov_vf(adev)) {
3790 gfx_v9_0_cp_gfx_enable(adev, false);
3791 /* must disable polling for SRIOV when hw finished, otherwise
3792 * CPC engine may still keep fetching WB address which is already
3793 * invalid after sw finished and trigger DMAR reading error in
3794 * hypervisor side.
3795 */
3796 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3797 return 0;
3798 }
3799
3800 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3801 * otherwise KIQ is hanging when binding back
3802 */
3803 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3804 mutex_lock(&adev->srbm_mutex);
3805 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3806 adev->gfx.kiq[0].ring.pipe,
3807 adev->gfx.kiq[0].ring.queue, 0, 0);
3808 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3809 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3810 mutex_unlock(&adev->srbm_mutex);
3811 }
3812
3813 gfx_v9_0_cp_enable(adev, false);
3814
3815 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3816 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3817 (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3818 dev_dbg(adev->dev, "Skipping RLC halt\n");
3819 return 0;
3820 }
3821
3822 adev->gfx.rlc.funcs->stop(adev);
3823 return 0;
3824 }
3825
gfx_v9_0_suspend(void * handle)3826 static int gfx_v9_0_suspend(void *handle)
3827 {
3828 return gfx_v9_0_hw_fini(handle);
3829 }
3830
gfx_v9_0_resume(void * handle)3831 static int gfx_v9_0_resume(void *handle)
3832 {
3833 return gfx_v9_0_hw_init(handle);
3834 }
3835
gfx_v9_0_is_idle(void * handle)3836 static bool gfx_v9_0_is_idle(void *handle)
3837 {
3838 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3839
3840 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3841 GRBM_STATUS, GUI_ACTIVE))
3842 return false;
3843 else
3844 return true;
3845 }
3846
gfx_v9_0_wait_for_idle(void * handle)3847 static int gfx_v9_0_wait_for_idle(void *handle)
3848 {
3849 unsigned i;
3850 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3851
3852 for (i = 0; i < adev->usec_timeout; i++) {
3853 if (gfx_v9_0_is_idle(handle))
3854 return 0;
3855 udelay(1);
3856 }
3857 return -ETIMEDOUT;
3858 }
3859
gfx_v9_0_soft_reset(void * handle)3860 static int gfx_v9_0_soft_reset(void *handle)
3861 {
3862 u32 grbm_soft_reset = 0;
3863 u32 tmp;
3864 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3865
3866 /* GRBM_STATUS */
3867 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3868 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3869 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3870 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3871 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3872 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3873 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3874 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3875 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3876 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3877 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3878 }
3879
3880 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3881 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3882 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3883 }
3884
3885 /* GRBM_STATUS2 */
3886 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3887 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3888 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3889 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3890
3891
3892 if (grbm_soft_reset) {
3893 /* stop the rlc */
3894 adev->gfx.rlc.funcs->stop(adev);
3895
3896 if (adev->gfx.num_gfx_rings)
3897 /* Disable GFX parsing/prefetching */
3898 gfx_v9_0_cp_gfx_enable(adev, false);
3899
3900 /* Disable MEC parsing/prefetching */
3901 gfx_v9_0_cp_compute_enable(adev, false);
3902
3903 if (grbm_soft_reset) {
3904 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3905 tmp |= grbm_soft_reset;
3906 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3907 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3908 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3909
3910 udelay(50);
3911
3912 tmp &= ~grbm_soft_reset;
3913 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3914 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3915 }
3916
3917 /* Wait a little for things to settle down */
3918 udelay(50);
3919 }
3920 return 0;
3921 }
3922
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)3923 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3924 {
3925 signed long r, cnt = 0;
3926 unsigned long flags;
3927 uint32_t seq, reg_val_offs = 0;
3928 uint64_t value = 0;
3929 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
3930 struct amdgpu_ring *ring = &kiq->ring;
3931
3932 BUG_ON(!ring->funcs->emit_rreg);
3933
3934 spin_lock_irqsave(&kiq->ring_lock, flags);
3935 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
3936 pr_err("critical bug! too many kiq readers\n");
3937 goto failed_unlock;
3938 }
3939 amdgpu_ring_alloc(ring, 32);
3940 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3941 amdgpu_ring_write(ring, 9 | /* src: register*/
3942 (5 << 8) | /* dst: memory */
3943 (1 << 16) | /* count sel */
3944 (1 << 20)); /* write confirm */
3945 amdgpu_ring_write(ring, 0);
3946 amdgpu_ring_write(ring, 0);
3947 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3948 reg_val_offs * 4));
3949 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3950 reg_val_offs * 4));
3951 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3952 if (r)
3953 goto failed_undo;
3954
3955 amdgpu_ring_commit(ring);
3956 spin_unlock_irqrestore(&kiq->ring_lock, flags);
3957
3958 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3959
3960 /* don't wait anymore for gpu reset case because this way may
3961 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3962 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3963 * never return if we keep waiting in virt_kiq_rreg, which cause
3964 * gpu_recover() hang there.
3965 *
3966 * also don't wait anymore for IRQ context
3967 * */
3968 if (r < 1 && (amdgpu_in_reset(adev)))
3969 goto failed_kiq_read;
3970
3971 might_sleep();
3972 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3973 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3974 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3975 }
3976
3977 if (cnt > MAX_KIQ_REG_TRY)
3978 goto failed_kiq_read;
3979
3980 mb();
3981 value = (uint64_t)adev->wb.wb[reg_val_offs] |
3982 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3983 amdgpu_device_wb_free(adev, reg_val_offs);
3984 return value;
3985
3986 failed_undo:
3987 amdgpu_ring_undo(ring);
3988 failed_unlock:
3989 spin_unlock_irqrestore(&kiq->ring_lock, flags);
3990 failed_kiq_read:
3991 if (reg_val_offs)
3992 amdgpu_device_wb_free(adev, reg_val_offs);
3993 pr_err("failed to read gpu clock\n");
3994 return ~0;
3995 }
3996
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)3997 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3998 {
3999 uint64_t clock, clock_lo, clock_hi, hi_check;
4000
4001 switch (adev->ip_versions[GC_HWIP][0]) {
4002 case IP_VERSION(9, 3, 0):
4003 preempt_disable();
4004 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4005 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4006 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4007 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4008 * roughly every 42 seconds.
4009 */
4010 if (hi_check != clock_hi) {
4011 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4012 clock_hi = hi_check;
4013 }
4014 preempt_enable();
4015 clock = clock_lo | (clock_hi << 32ULL);
4016 break;
4017 default:
4018 amdgpu_gfx_off_ctrl(adev, false);
4019 mutex_lock(&adev->gfx.gpu_clock_mutex);
4020 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4021 clock = gfx_v9_0_kiq_read_clock(adev);
4022 } else {
4023 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4024 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4025 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4026 }
4027 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4028 amdgpu_gfx_off_ctrl(adev, true);
4029 break;
4030 }
4031 return clock;
4032 }
4033
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4034 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4035 uint32_t vmid,
4036 uint32_t gds_base, uint32_t gds_size,
4037 uint32_t gws_base, uint32_t gws_size,
4038 uint32_t oa_base, uint32_t oa_size)
4039 {
4040 struct amdgpu_device *adev = ring->adev;
4041
4042 /* GDS Base */
4043 gfx_v9_0_write_data_to_reg(ring, 0, false,
4044 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4045 gds_base);
4046
4047 /* GDS Size */
4048 gfx_v9_0_write_data_to_reg(ring, 0, false,
4049 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4050 gds_size);
4051
4052 /* GWS */
4053 gfx_v9_0_write_data_to_reg(ring, 0, false,
4054 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4055 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4056
4057 /* OA */
4058 gfx_v9_0_write_data_to_reg(ring, 0, false,
4059 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4060 (1 << (oa_size + oa_base)) - (1 << oa_base));
4061 }
4062
4063 static const u32 vgpr_init_compute_shader[] =
4064 {
4065 0xb07c0000, 0xbe8000ff,
4066 0x000000f8, 0xbf110800,
4067 0x7e000280, 0x7e020280,
4068 0x7e040280, 0x7e060280,
4069 0x7e080280, 0x7e0a0280,
4070 0x7e0c0280, 0x7e0e0280,
4071 0x80808800, 0xbe803200,
4072 0xbf84fff5, 0xbf9c0000,
4073 0xd28c0001, 0x0001007f,
4074 0xd28d0001, 0x0002027e,
4075 0x10020288, 0xb8810904,
4076 0xb7814000, 0xd1196a01,
4077 0x00000301, 0xbe800087,
4078 0xbefc00c1, 0xd89c4000,
4079 0x00020201, 0xd89cc080,
4080 0x00040401, 0x320202ff,
4081 0x00000800, 0x80808100,
4082 0xbf84fff8, 0x7e020280,
4083 0xbf810000, 0x00000000,
4084 };
4085
4086 static const u32 sgpr_init_compute_shader[] =
4087 {
4088 0xb07c0000, 0xbe8000ff,
4089 0x0000005f, 0xbee50080,
4090 0xbe812c65, 0xbe822c65,
4091 0xbe832c65, 0xbe842c65,
4092 0xbe852c65, 0xb77c0005,
4093 0x80808500, 0xbf84fff8,
4094 0xbe800080, 0xbf810000,
4095 };
4096
4097 static const u32 vgpr_init_compute_shader_arcturus[] = {
4098 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4099 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4100 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4101 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4102 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4103 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4104 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4105 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4106 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4107 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4108 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4109 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4110 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4111 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4112 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4113 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4114 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4115 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4116 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4117 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4118 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4119 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4120 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4121 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4122 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4123 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4124 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4125 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4126 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4127 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4128 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4129 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4130 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4131 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4132 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4133 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4134 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4135 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4136 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4137 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4138 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4139 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4140 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4141 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4142 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4143 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4144 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4145 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4146 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4147 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4148 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4149 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4150 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4151 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4152 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4153 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4154 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4155 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4156 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4157 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4158 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4159 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4160 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4161 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4162 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4163 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4164 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4165 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4166 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4167 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4168 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4169 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4170 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4171 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4172 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4173 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4174 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4175 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4176 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4177 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4178 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4179 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4180 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4181 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4182 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4183 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4184 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4185 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4186 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4187 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4188 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4189 0xbf84fff8, 0xbf810000,
4190 };
4191
4192 /* When below register arrays changed, please update gpr_reg_size,
4193 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4194 to cover all gfx9 ASICs */
4195 static const struct soc15_reg_entry vgpr_init_regs[] = {
4196 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4197 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4198 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4199 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4200 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4201 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4202 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4203 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4204 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4205 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4206 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4207 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4208 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4209 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4210 };
4211
4212 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4213 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4214 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4215 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4216 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4217 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4218 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4219 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4220 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4221 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4222 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4223 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4224 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4225 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4226 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4227 };
4228
4229 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4230 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4231 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4232 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4233 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4234 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4235 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4236 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4237 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4238 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4239 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4240 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4241 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4242 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4243 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4244 };
4245
4246 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4247 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4248 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4249 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4250 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4251 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4252 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4253 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4254 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4255 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4256 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4257 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4258 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4259 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4260 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4261 };
4262
4263 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4264 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4265 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4266 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4267 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4268 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4269 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4270 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4271 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4272 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4273 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4274 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4275 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4276 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4277 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4278 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4279 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4280 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4281 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4282 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4283 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4284 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4285 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4286 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4287 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4288 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4289 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4290 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4291 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4292 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4293 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4294 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4295 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4296 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4297 };
4298
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4299 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4300 {
4301 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4302 int i, r;
4303
4304 /* only support when RAS is enabled */
4305 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4306 return 0;
4307
4308 r = amdgpu_ring_alloc(ring, 7);
4309 if (r) {
4310 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4311 ring->name, r);
4312 return r;
4313 }
4314
4315 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4316 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4317
4318 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4319 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4320 PACKET3_DMA_DATA_DST_SEL(1) |
4321 PACKET3_DMA_DATA_SRC_SEL(2) |
4322 PACKET3_DMA_DATA_ENGINE(0)));
4323 amdgpu_ring_write(ring, 0);
4324 amdgpu_ring_write(ring, 0);
4325 amdgpu_ring_write(ring, 0);
4326 amdgpu_ring_write(ring, 0);
4327 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4328 adev->gds.gds_size);
4329
4330 amdgpu_ring_commit(ring);
4331
4332 for (i = 0; i < adev->usec_timeout; i++) {
4333 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4334 break;
4335 udelay(1);
4336 }
4337
4338 if (i >= adev->usec_timeout)
4339 r = -ETIMEDOUT;
4340
4341 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4342
4343 return r;
4344 }
4345
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4346 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4347 {
4348 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4349 struct amdgpu_ib ib;
4350 struct dma_fence *f = NULL;
4351 int r, i;
4352 unsigned total_size, vgpr_offset, sgpr_offset;
4353 u64 gpu_addr;
4354
4355 int compute_dim_x = adev->gfx.config.max_shader_engines *
4356 adev->gfx.config.max_cu_per_sh *
4357 adev->gfx.config.max_sh_per_se;
4358 int sgpr_work_group_size = 5;
4359 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4360 int vgpr_init_shader_size;
4361 const u32 *vgpr_init_shader_ptr;
4362 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4363
4364 /* only support when RAS is enabled */
4365 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4366 return 0;
4367
4368 /* bail if the compute ring is not ready */
4369 if (!ring->sched.ready)
4370 return 0;
4371
4372 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4373 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4374 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4375 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4376 } else {
4377 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4378 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4379 vgpr_init_regs_ptr = vgpr_init_regs;
4380 }
4381
4382 total_size =
4383 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4384 total_size +=
4385 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4386 total_size +=
4387 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4388 total_size = ALIGN(total_size, 256);
4389 vgpr_offset = total_size;
4390 total_size += ALIGN(vgpr_init_shader_size, 256);
4391 sgpr_offset = total_size;
4392 total_size += sizeof(sgpr_init_compute_shader);
4393
4394 /* allocate an indirect buffer to put the commands in */
4395 memset(&ib, 0, sizeof(ib));
4396 r = amdgpu_ib_get(adev, NULL, total_size,
4397 AMDGPU_IB_POOL_DIRECT, &ib);
4398 if (r) {
4399 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4400 return r;
4401 }
4402
4403 /* load the compute shaders */
4404 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4405 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4406
4407 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4408 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4409
4410 /* init the ib length to 0 */
4411 ib.length_dw = 0;
4412
4413 /* VGPR */
4414 /* write the register state for the compute dispatch */
4415 for (i = 0; i < gpr_reg_size; i++) {
4416 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4417 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4418 - PACKET3_SET_SH_REG_START;
4419 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4420 }
4421 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4422 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4423 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4424 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4425 - PACKET3_SET_SH_REG_START;
4426 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4427 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4428
4429 /* write dispatch packet */
4430 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4431 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4432 ib.ptr[ib.length_dw++] = 1; /* y */
4433 ib.ptr[ib.length_dw++] = 1; /* z */
4434 ib.ptr[ib.length_dw++] =
4435 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4436
4437 /* write CS partial flush packet */
4438 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4439 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4440
4441 /* SGPR1 */
4442 /* write the register state for the compute dispatch */
4443 for (i = 0; i < gpr_reg_size; i++) {
4444 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4445 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4446 - PACKET3_SET_SH_REG_START;
4447 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4448 }
4449 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4450 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4451 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4452 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4453 - PACKET3_SET_SH_REG_START;
4454 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4455 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4456
4457 /* write dispatch packet */
4458 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4459 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4460 ib.ptr[ib.length_dw++] = 1; /* y */
4461 ib.ptr[ib.length_dw++] = 1; /* z */
4462 ib.ptr[ib.length_dw++] =
4463 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4464
4465 /* write CS partial flush packet */
4466 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4467 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4468
4469 /* SGPR2 */
4470 /* write the register state for the compute dispatch */
4471 for (i = 0; i < gpr_reg_size; i++) {
4472 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4473 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4474 - PACKET3_SET_SH_REG_START;
4475 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4476 }
4477 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4478 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4479 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4480 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4481 - PACKET3_SET_SH_REG_START;
4482 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4483 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4484
4485 /* write dispatch packet */
4486 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4487 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4488 ib.ptr[ib.length_dw++] = 1; /* y */
4489 ib.ptr[ib.length_dw++] = 1; /* z */
4490 ib.ptr[ib.length_dw++] =
4491 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4492
4493 /* write CS partial flush packet */
4494 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4495 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4496
4497 /* shedule the ib on the ring */
4498 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4499 if (r) {
4500 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4501 goto fail;
4502 }
4503
4504 /* wait for the GPU to finish processing the IB */
4505 r = dma_fence_wait(f, false);
4506 if (r) {
4507 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4508 goto fail;
4509 }
4510
4511 fail:
4512 amdgpu_ib_free(adev, &ib, NULL);
4513 dma_fence_put(f);
4514
4515 return r;
4516 }
4517
gfx_v9_0_early_init(void * handle)4518 static int gfx_v9_0_early_init(void *handle)
4519 {
4520 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4521
4522 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4523
4524 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4525 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4526 adev->gfx.num_gfx_rings = 0;
4527 else
4528 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4529 adev->gfx.xcc_mask = 1;
4530 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4531 AMDGPU_MAX_COMPUTE_RINGS);
4532 gfx_v9_0_set_kiq_pm4_funcs(adev);
4533 gfx_v9_0_set_ring_funcs(adev);
4534 gfx_v9_0_set_irq_funcs(adev);
4535 gfx_v9_0_set_gds_init(adev);
4536 gfx_v9_0_set_rlc_funcs(adev);
4537
4538 /* init rlcg reg access ctrl */
4539 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4540
4541 return gfx_v9_0_init_microcode(adev);
4542 }
4543
gfx_v9_0_ecc_late_init(void * handle)4544 static int gfx_v9_0_ecc_late_init(void *handle)
4545 {
4546 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4547 int r;
4548
4549 /*
4550 * Temp workaround to fix the issue that CP firmware fails to
4551 * update read pointer when CPDMA is writing clearing operation
4552 * to GDS in suspend/resume sequence on several cards. So just
4553 * limit this operation in cold boot sequence.
4554 */
4555 if ((!adev->in_suspend) &&
4556 (adev->gds.gds_size)) {
4557 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4558 if (r)
4559 return r;
4560 }
4561
4562 /* requires IBs so do in late init after IB pool is initialized */
4563 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4564 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4565 else
4566 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4567
4568 if (r)
4569 return r;
4570
4571 if (adev->gfx.ras &&
4572 adev->gfx.ras->enable_watchdog_timer)
4573 adev->gfx.ras->enable_watchdog_timer(adev);
4574
4575 return 0;
4576 }
4577
gfx_v9_0_late_init(void * handle)4578 static int gfx_v9_0_late_init(void *handle)
4579 {
4580 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4581 int r;
4582
4583 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4584 if (r)
4585 return r;
4586
4587 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4588 if (r)
4589 return r;
4590
4591 r = gfx_v9_0_ecc_late_init(handle);
4592 if (r)
4593 return r;
4594
4595 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4596 gfx_v9_4_2_debug_trap_config_init(adev,
4597 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4598 else
4599 gfx_v9_0_debug_trap_config_init(adev,
4600 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4601
4602 return 0;
4603 }
4604
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4605 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4606 {
4607 uint32_t rlc_setting;
4608
4609 /* if RLC is not enabled, do nothing */
4610 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4611 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4612 return false;
4613
4614 return true;
4615 }
4616
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4617 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4618 {
4619 uint32_t data;
4620 unsigned i;
4621
4622 data = RLC_SAFE_MODE__CMD_MASK;
4623 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4624 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4625
4626 /* wait for RLC_SAFE_MODE */
4627 for (i = 0; i < adev->usec_timeout; i++) {
4628 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4629 break;
4630 udelay(1);
4631 }
4632 }
4633
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4634 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4635 {
4636 uint32_t data;
4637
4638 data = RLC_SAFE_MODE__CMD_MASK;
4639 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4640 }
4641
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4642 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4643 bool enable)
4644 {
4645 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4646
4647 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4648 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4649 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4650 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4651 } else {
4652 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4653 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4654 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4655 }
4656
4657 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4658 }
4659
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4660 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4661 bool enable)
4662 {
4663 /* TODO: double check if we need to perform under safe mode */
4664 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4665
4666 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4667 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4668 else
4669 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4670
4671 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4672 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4673 else
4674 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4675
4676 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4677 }
4678
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4679 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4680 bool enable)
4681 {
4682 uint32_t data, def;
4683
4684 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4685
4686 /* It is disabled by HW by default */
4687 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4688 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4689 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4690
4691 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4692 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4693
4694 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4695 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4696 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4697
4698 /* only for Vega10 & Raven1 */
4699 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4700
4701 if (def != data)
4702 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4703
4704 /* MGLS is a global flag to control all MGLS in GFX */
4705 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4706 /* 2 - RLC memory Light sleep */
4707 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4708 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4709 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4710 if (def != data)
4711 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4712 }
4713 /* 3 - CP memory Light sleep */
4714 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4715 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4716 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4717 if (def != data)
4718 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4719 }
4720 }
4721 } else {
4722 /* 1 - MGCG_OVERRIDE */
4723 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4724
4725 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4726 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4727
4728 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4729 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4730 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4731 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4732
4733 if (def != data)
4734 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4735
4736 /* 2 - disable MGLS in RLC */
4737 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4738 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4739 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4740 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4741 }
4742
4743 /* 3 - disable MGLS in CP */
4744 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4745 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4746 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4747 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4748 }
4749 }
4750
4751 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4752 }
4753
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)4754 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4755 bool enable)
4756 {
4757 uint32_t data, def;
4758
4759 if (!adev->gfx.num_gfx_rings)
4760 return;
4761
4762 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4763
4764 /* Enable 3D CGCG/CGLS */
4765 if (enable) {
4766 /* write cmd to clear cgcg/cgls ov */
4767 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4768 /* unset CGCG override */
4769 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4770 /* update CGCG and CGLS override bits */
4771 if (def != data)
4772 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4773
4774 /* enable 3Dcgcg FSM(0x0000363f) */
4775 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4776
4777 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4778 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4779 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4780 else
4781 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4782
4783 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4784 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4785 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4786 if (def != data)
4787 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4788
4789 /* set IDLE_POLL_COUNT(0x00900100) */
4790 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4791 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4792 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4793 if (def != data)
4794 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4795 } else {
4796 /* Disable CGCG/CGLS */
4797 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4798 /* disable cgcg, cgls should be disabled */
4799 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4800 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4801 /* disable cgcg and cgls in FSM */
4802 if (def != data)
4803 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4804 }
4805
4806 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4807 }
4808
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)4809 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4810 bool enable)
4811 {
4812 uint32_t def, data;
4813
4814 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4815
4816 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4817 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4818 /* unset CGCG override */
4819 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4820 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4821 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4822 else
4823 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4824 /* update CGCG and CGLS override bits */
4825 if (def != data)
4826 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4827
4828 /* enable cgcg FSM(0x0000363F) */
4829 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4830
4831 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4832 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4833 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4834 else
4835 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4836 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4837 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4838 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4839 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4840 if (def != data)
4841 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4842
4843 /* set IDLE_POLL_COUNT(0x00900100) */
4844 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4845 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4846 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4847 if (def != data)
4848 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4849 } else {
4850 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4851 /* reset CGCG/CGLS bits */
4852 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4853 /* disable cgcg and cgls in FSM */
4854 if (def != data)
4855 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4856 }
4857
4858 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4859 }
4860
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)4861 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4862 bool enable)
4863 {
4864 if (enable) {
4865 /* CGCG/CGLS should be enabled after MGCG/MGLS
4866 * === MGCG + MGLS ===
4867 */
4868 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4869 /* === CGCG /CGLS for GFX 3D Only === */
4870 gfx_v9_0_update_3d_clock_gating(adev, enable);
4871 /* === CGCG + CGLS === */
4872 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4873 } else {
4874 /* CGCG/CGLS should be disabled before MGCG/MGLS
4875 * === CGCG + CGLS ===
4876 */
4877 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4878 /* === CGCG /CGLS for GFX 3D Only === */
4879 gfx_v9_0_update_3d_clock_gating(adev, enable);
4880 /* === MGCG + MGLS === */
4881 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4882 }
4883 return 0;
4884 }
4885
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)4886 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
4887 unsigned int vmid)
4888 {
4889 u32 reg, data;
4890
4891 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4892 if (amdgpu_sriov_is_pp_one_vf(adev))
4893 data = RREG32_NO_KIQ(reg);
4894 else
4895 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4896
4897 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4898 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4899
4900 if (amdgpu_sriov_is_pp_one_vf(adev))
4901 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4902 else
4903 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4904 }
4905
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,unsigned int vmid)4906 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
4907 {
4908 amdgpu_gfx_off_ctrl(adev, false);
4909
4910 gfx_v9_0_update_spm_vmid_internal(adev, vmid);
4911
4912 amdgpu_gfx_off_ctrl(adev, true);
4913 }
4914
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)4915 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4916 uint32_t offset,
4917 struct soc15_reg_rlcg *entries, int arr_size)
4918 {
4919 int i;
4920 uint32_t reg;
4921
4922 if (!entries)
4923 return false;
4924
4925 for (i = 0; i < arr_size; i++) {
4926 const struct soc15_reg_rlcg *entry;
4927
4928 entry = &entries[i];
4929 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4930 if (offset == reg)
4931 return true;
4932 }
4933
4934 return false;
4935 }
4936
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)4937 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4938 {
4939 return gfx_v9_0_check_rlcg_range(adev, offset,
4940 (void *)rlcg_access_gc_9_0,
4941 ARRAY_SIZE(rlcg_access_gc_9_0));
4942 }
4943
4944 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4945 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4946 .set_safe_mode = gfx_v9_0_set_safe_mode,
4947 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4948 .init = gfx_v9_0_rlc_init,
4949 .get_csb_size = gfx_v9_0_get_csb_size,
4950 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4951 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4952 .resume = gfx_v9_0_rlc_resume,
4953 .stop = gfx_v9_0_rlc_stop,
4954 .reset = gfx_v9_0_rlc_reset,
4955 .start = gfx_v9_0_rlc_start,
4956 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
4957 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4958 };
4959
gfx_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)4960 static int gfx_v9_0_set_powergating_state(void *handle,
4961 enum amd_powergating_state state)
4962 {
4963 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4964 bool enable = (state == AMD_PG_STATE_GATE);
4965
4966 switch (adev->ip_versions[GC_HWIP][0]) {
4967 case IP_VERSION(9, 2, 2):
4968 case IP_VERSION(9, 1, 0):
4969 case IP_VERSION(9, 3, 0):
4970 if (!enable)
4971 amdgpu_gfx_off_ctrl(adev, false);
4972
4973 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4974 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4975 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4976 } else {
4977 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4978 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4979 }
4980
4981 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4982 gfx_v9_0_enable_cp_power_gating(adev, true);
4983 else
4984 gfx_v9_0_enable_cp_power_gating(adev, false);
4985
4986 /* update gfx cgpg state */
4987 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4988
4989 /* update mgcg state */
4990 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4991
4992 if (enable)
4993 amdgpu_gfx_off_ctrl(adev, true);
4994 break;
4995 case IP_VERSION(9, 2, 1):
4996 amdgpu_gfx_off_ctrl(adev, enable);
4997 break;
4998 default:
4999 break;
5000 }
5001
5002 return 0;
5003 }
5004
gfx_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)5005 static int gfx_v9_0_set_clockgating_state(void *handle,
5006 enum amd_clockgating_state state)
5007 {
5008 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5009
5010 if (amdgpu_sriov_vf(adev))
5011 return 0;
5012
5013 switch (adev->ip_versions[GC_HWIP][0]) {
5014 case IP_VERSION(9, 0, 1):
5015 case IP_VERSION(9, 2, 1):
5016 case IP_VERSION(9, 4, 0):
5017 case IP_VERSION(9, 2, 2):
5018 case IP_VERSION(9, 1, 0):
5019 case IP_VERSION(9, 4, 1):
5020 case IP_VERSION(9, 3, 0):
5021 case IP_VERSION(9, 4, 2):
5022 gfx_v9_0_update_gfx_clock_gating(adev,
5023 state == AMD_CG_STATE_GATE);
5024 break;
5025 default:
5026 break;
5027 }
5028 return 0;
5029 }
5030
gfx_v9_0_get_clockgating_state(void * handle,u64 * flags)5031 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5032 {
5033 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034 int data;
5035
5036 if (amdgpu_sriov_vf(adev))
5037 *flags = 0;
5038
5039 /* AMD_CG_SUPPORT_GFX_MGCG */
5040 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5041 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5042 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5043
5044 /* AMD_CG_SUPPORT_GFX_CGCG */
5045 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5046 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5047 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5048
5049 /* AMD_CG_SUPPORT_GFX_CGLS */
5050 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5051 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5052
5053 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5054 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5055 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5056 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5057
5058 /* AMD_CG_SUPPORT_GFX_CP_LS */
5059 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5060 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5061 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5062
5063 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5064 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5065 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5066 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5067 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5068
5069 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5070 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5071 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5072 }
5073 }
5074
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5075 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5076 {
5077 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5078 }
5079
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5080 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5081 {
5082 struct amdgpu_device *adev = ring->adev;
5083 u64 wptr;
5084
5085 /* XXX check if swapping is necessary on BE */
5086 if (ring->use_doorbell) {
5087 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5088 } else {
5089 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5090 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5091 }
5092
5093 return wptr;
5094 }
5095
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5096 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5097 {
5098 struct amdgpu_device *adev = ring->adev;
5099
5100 if (ring->use_doorbell) {
5101 /* XXX check if swapping is necessary on BE */
5102 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5103 WDOORBELL64(ring->doorbell_index, ring->wptr);
5104 } else {
5105 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5106 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5107 }
5108 }
5109
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5110 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5111 {
5112 struct amdgpu_device *adev = ring->adev;
5113 u32 ref_and_mask, reg_mem_engine;
5114 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5115
5116 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5117 switch (ring->me) {
5118 case 1:
5119 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5120 break;
5121 case 2:
5122 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5123 break;
5124 default:
5125 return;
5126 }
5127 reg_mem_engine = 0;
5128 } else {
5129 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5130 reg_mem_engine = 1; /* pfp */
5131 }
5132
5133 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5134 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5135 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5136 ref_and_mask, ref_and_mask, 0x20);
5137 }
5138
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5139 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5140 struct amdgpu_job *job,
5141 struct amdgpu_ib *ib,
5142 uint32_t flags)
5143 {
5144 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5145 u32 header, control = 0;
5146
5147 if (ib->flags & AMDGPU_IB_FLAG_CE)
5148 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5149 else
5150 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5151
5152 control |= ib->length_dw | (vmid << 24);
5153
5154 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5155 control |= INDIRECT_BUFFER_PRE_ENB(1);
5156
5157 if (flags & AMDGPU_IB_PREEMPTED)
5158 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5159
5160 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5161 gfx_v9_0_ring_emit_de_meta(ring,
5162 (!amdgpu_sriov_vf(ring->adev) &&
5163 flags & AMDGPU_IB_PREEMPTED) ?
5164 true : false,
5165 job->gds_size > 0 && job->gds_base != 0);
5166 }
5167
5168 amdgpu_ring_write(ring, header);
5169 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5170 amdgpu_ring_write(ring,
5171 #ifdef __BIG_ENDIAN
5172 (2 << 0) |
5173 #endif
5174 lower_32_bits(ib->gpu_addr));
5175 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5176 amdgpu_ring_ib_on_emit_cntl(ring);
5177 amdgpu_ring_write(ring, control);
5178 }
5179
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5180 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5181 unsigned offset)
5182 {
5183 u32 control = ring->ring[offset];
5184
5185 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5186 ring->ring[offset] = control;
5187 }
5188
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5189 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5190 unsigned offset)
5191 {
5192 struct amdgpu_device *adev = ring->adev;
5193 void *ce_payload_cpu_addr;
5194 uint64_t payload_offset, payload_size;
5195
5196 payload_size = sizeof(struct v9_ce_ib_state);
5197
5198 if (ring->is_mes_queue) {
5199 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5200 gfx[0].gfx_meta_data) +
5201 offsetof(struct v9_gfx_meta_data, ce_payload);
5202 ce_payload_cpu_addr =
5203 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5204 } else {
5205 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5206 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5207 }
5208
5209 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5210 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5211 } else {
5212 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5213 (ring->buf_mask + 1 - offset) << 2);
5214 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5215 memcpy((void *)&ring->ring[0],
5216 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5217 payload_size);
5218 }
5219 }
5220
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5221 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5222 unsigned offset)
5223 {
5224 struct amdgpu_device *adev = ring->adev;
5225 void *de_payload_cpu_addr;
5226 uint64_t payload_offset, payload_size;
5227
5228 payload_size = sizeof(struct v9_de_ib_state);
5229
5230 if (ring->is_mes_queue) {
5231 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5232 gfx[0].gfx_meta_data) +
5233 offsetof(struct v9_gfx_meta_data, de_payload);
5234 de_payload_cpu_addr =
5235 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5236 } else {
5237 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5238 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5239 }
5240
5241 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5242 IB_COMPLETION_STATUS_PREEMPTED;
5243
5244 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5245 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5246 } else {
5247 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5248 (ring->buf_mask + 1 - offset) << 2);
5249 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5250 memcpy((void *)&ring->ring[0],
5251 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5252 payload_size);
5253 }
5254 }
5255
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5256 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5257 struct amdgpu_job *job,
5258 struct amdgpu_ib *ib,
5259 uint32_t flags)
5260 {
5261 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5262 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5263
5264 /* Currently, there is a high possibility to get wave ID mismatch
5265 * between ME and GDS, leading to a hw deadlock, because ME generates
5266 * different wave IDs than the GDS expects. This situation happens
5267 * randomly when at least 5 compute pipes use GDS ordered append.
5268 * The wave IDs generated by ME are also wrong after suspend/resume.
5269 * Those are probably bugs somewhere else in the kernel driver.
5270 *
5271 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5272 * GDS to 0 for this ring (me/pipe).
5273 */
5274 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5275 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5276 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5277 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5278 }
5279
5280 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5281 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5282 amdgpu_ring_write(ring,
5283 #ifdef __BIG_ENDIAN
5284 (2 << 0) |
5285 #endif
5286 lower_32_bits(ib->gpu_addr));
5287 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5288 amdgpu_ring_write(ring, control);
5289 }
5290
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5291 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5292 u64 seq, unsigned flags)
5293 {
5294 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5295 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5296 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5297 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5298 uint32_t dw2 = 0;
5299
5300 /* RELEASE_MEM - flush caches, send int */
5301 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5302
5303 if (writeback) {
5304 dw2 = EOP_TC_NC_ACTION_EN;
5305 } else {
5306 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5307 EOP_TC_MD_ACTION_EN;
5308 }
5309 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5310 EVENT_INDEX(5);
5311 if (exec)
5312 dw2 |= EOP_EXEC;
5313
5314 amdgpu_ring_write(ring, dw2);
5315 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5316
5317 /*
5318 * the address should be Qword aligned if 64bit write, Dword
5319 * aligned if only send 32bit data low (discard data high)
5320 */
5321 if (write64bit)
5322 BUG_ON(addr & 0x7);
5323 else
5324 BUG_ON(addr & 0x3);
5325 amdgpu_ring_write(ring, lower_32_bits(addr));
5326 amdgpu_ring_write(ring, upper_32_bits(addr));
5327 amdgpu_ring_write(ring, lower_32_bits(seq));
5328 amdgpu_ring_write(ring, upper_32_bits(seq));
5329 amdgpu_ring_write(ring, 0);
5330 }
5331
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5332 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5333 {
5334 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5335 uint32_t seq = ring->fence_drv.sync_seq;
5336 uint64_t addr = ring->fence_drv.gpu_addr;
5337
5338 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5339 lower_32_bits(addr), upper_32_bits(addr),
5340 seq, 0xffffffff, 4);
5341 }
5342
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5343 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5344 unsigned vmid, uint64_t pd_addr)
5345 {
5346 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5347
5348 /* compute doesn't have PFP */
5349 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5350 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5351 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5352 amdgpu_ring_write(ring, 0x0);
5353 }
5354 }
5355
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5356 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5357 {
5358 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5359 }
5360
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5361 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5362 {
5363 u64 wptr;
5364
5365 /* XXX check if swapping is necessary on BE */
5366 if (ring->use_doorbell)
5367 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5368 else
5369 BUG();
5370 return wptr;
5371 }
5372
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5373 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5374 {
5375 struct amdgpu_device *adev = ring->adev;
5376
5377 /* XXX check if swapping is necessary on BE */
5378 if (ring->use_doorbell) {
5379 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5380 WDOORBELL64(ring->doorbell_index, ring->wptr);
5381 } else{
5382 BUG(); /* only DOORBELL method supported on gfx9 now */
5383 }
5384 }
5385
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5386 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5387 u64 seq, unsigned int flags)
5388 {
5389 struct amdgpu_device *adev = ring->adev;
5390
5391 /* we only allocate 32bit for each seq wb address */
5392 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5393
5394 /* write fence seq to the "addr" */
5395 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5396 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5397 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5398 amdgpu_ring_write(ring, lower_32_bits(addr));
5399 amdgpu_ring_write(ring, upper_32_bits(addr));
5400 amdgpu_ring_write(ring, lower_32_bits(seq));
5401
5402 if (flags & AMDGPU_FENCE_FLAG_INT) {
5403 /* set register to trigger INT */
5404 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5405 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5406 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5407 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5408 amdgpu_ring_write(ring, 0);
5409 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5410 }
5411 }
5412
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5413 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5414 {
5415 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5416 amdgpu_ring_write(ring, 0);
5417 }
5418
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5419 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5420 {
5421 struct amdgpu_device *adev = ring->adev;
5422 struct v9_ce_ib_state ce_payload = {0};
5423 uint64_t offset, ce_payload_gpu_addr;
5424 void *ce_payload_cpu_addr;
5425 int cnt;
5426
5427 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5428
5429 if (ring->is_mes_queue) {
5430 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5431 gfx[0].gfx_meta_data) +
5432 offsetof(struct v9_gfx_meta_data, ce_payload);
5433 ce_payload_gpu_addr =
5434 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5435 ce_payload_cpu_addr =
5436 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5437 } else {
5438 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5439 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5440 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5441 }
5442
5443 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5444 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5445 WRITE_DATA_DST_SEL(8) |
5446 WR_CONFIRM) |
5447 WRITE_DATA_CACHE_POLICY(0));
5448 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5449 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5450
5451 amdgpu_ring_ib_on_emit_ce(ring);
5452
5453 if (resume)
5454 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5455 sizeof(ce_payload) >> 2);
5456 else
5457 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5458 sizeof(ce_payload) >> 2);
5459 }
5460
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5461 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5462 {
5463 int i, r = 0;
5464 struct amdgpu_device *adev = ring->adev;
5465 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5466 struct amdgpu_ring *kiq_ring = &kiq->ring;
5467 unsigned long flags;
5468
5469 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5470 return -EINVAL;
5471
5472 spin_lock_irqsave(&kiq->ring_lock, flags);
5473
5474 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5475 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5476 return -ENOMEM;
5477 }
5478
5479 /* assert preemption condition */
5480 amdgpu_ring_set_preempt_cond_exec(ring, false);
5481
5482 ring->trail_seq += 1;
5483 amdgpu_ring_alloc(ring, 13);
5484 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5485 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5486
5487 /* assert IB preemption, emit the trailing fence */
5488 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5489 ring->trail_fence_gpu_addr,
5490 ring->trail_seq);
5491
5492 amdgpu_ring_commit(kiq_ring);
5493 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5494
5495 /* poll the trailing fence */
5496 for (i = 0; i < adev->usec_timeout; i++) {
5497 if (ring->trail_seq ==
5498 le32_to_cpu(*ring->trail_fence_cpu_addr))
5499 break;
5500 udelay(1);
5501 }
5502
5503 if (i >= adev->usec_timeout) {
5504 r = -EINVAL;
5505 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5506 }
5507
5508 /*reset the CP_VMID_PREEMPT after trailing fence*/
5509 amdgpu_ring_emit_wreg(ring,
5510 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5511 0x0);
5512 amdgpu_ring_commit(ring);
5513
5514 /* deassert preemption condition */
5515 amdgpu_ring_set_preempt_cond_exec(ring, true);
5516 return r;
5517 }
5518
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5519 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5520 {
5521 struct amdgpu_device *adev = ring->adev;
5522 struct v9_de_ib_state de_payload = {0};
5523 uint64_t offset, gds_addr, de_payload_gpu_addr;
5524 void *de_payload_cpu_addr;
5525 int cnt;
5526
5527 if (ring->is_mes_queue) {
5528 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5529 gfx[0].gfx_meta_data) +
5530 offsetof(struct v9_gfx_meta_data, de_payload);
5531 de_payload_gpu_addr =
5532 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5533 de_payload_cpu_addr =
5534 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5535
5536 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5537 gfx[0].gds_backup) +
5538 offsetof(struct v9_gfx_meta_data, de_payload);
5539 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5540 } else {
5541 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5542 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5543 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5544
5545 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5546 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5547 PAGE_SIZE);
5548 }
5549
5550 if (usegds) {
5551 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5552 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5553 }
5554
5555 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5556 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5557 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5558 WRITE_DATA_DST_SEL(8) |
5559 WR_CONFIRM) |
5560 WRITE_DATA_CACHE_POLICY(0));
5561 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5562 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5563
5564 amdgpu_ring_ib_on_emit_de(ring);
5565 if (resume)
5566 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5567 sizeof(de_payload) >> 2);
5568 else
5569 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5570 sizeof(de_payload) >> 2);
5571 }
5572
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5573 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5574 bool secure)
5575 {
5576 uint32_t v = secure ? FRAME_TMZ : 0;
5577
5578 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5579 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5580 }
5581
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5582 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5583 {
5584 uint32_t dw2 = 0;
5585
5586 gfx_v9_0_ring_emit_ce_meta(ring,
5587 (!amdgpu_sriov_vf(ring->adev) &&
5588 flags & AMDGPU_IB_PREEMPTED) ? true : false);
5589
5590 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5591 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5592 /* set load_global_config & load_global_uconfig */
5593 dw2 |= 0x8001;
5594 /* set load_cs_sh_regs */
5595 dw2 |= 0x01000000;
5596 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5597 dw2 |= 0x10002;
5598
5599 /* set load_ce_ram if preamble presented */
5600 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5601 dw2 |= 0x10000000;
5602 } else {
5603 /* still load_ce_ram if this is the first time preamble presented
5604 * although there is no context switch happens.
5605 */
5606 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5607 dw2 |= 0x10000000;
5608 }
5609
5610 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5611 amdgpu_ring_write(ring, dw2);
5612 amdgpu_ring_write(ring, 0);
5613 }
5614
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring)5615 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5616 {
5617 unsigned ret;
5618 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5619 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5620 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5621 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5622 ret = ring->wptr & ring->buf_mask;
5623 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5624 return ret;
5625 }
5626
gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)5627 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5628 {
5629 unsigned cur;
5630 BUG_ON(offset > ring->buf_mask);
5631 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5632
5633 cur = (ring->wptr - 1) & ring->buf_mask;
5634 if (likely(cur > offset))
5635 ring->ring[offset] = cur - offset;
5636 else
5637 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5638 }
5639
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5640 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5641 uint32_t reg_val_offs)
5642 {
5643 struct amdgpu_device *adev = ring->adev;
5644
5645 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5646 amdgpu_ring_write(ring, 0 | /* src: register*/
5647 (5 << 8) | /* dst: memory */
5648 (1 << 20)); /* write confirm */
5649 amdgpu_ring_write(ring, reg);
5650 amdgpu_ring_write(ring, 0);
5651 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5652 reg_val_offs * 4));
5653 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5654 reg_val_offs * 4));
5655 }
5656
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5657 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5658 uint32_t val)
5659 {
5660 uint32_t cmd = 0;
5661
5662 switch (ring->funcs->type) {
5663 case AMDGPU_RING_TYPE_GFX:
5664 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5665 break;
5666 case AMDGPU_RING_TYPE_KIQ:
5667 cmd = (1 << 16); /* no inc addr */
5668 break;
5669 default:
5670 cmd = WR_CONFIRM;
5671 break;
5672 }
5673 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674 amdgpu_ring_write(ring, cmd);
5675 amdgpu_ring_write(ring, reg);
5676 amdgpu_ring_write(ring, 0);
5677 amdgpu_ring_write(ring, val);
5678 }
5679
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5680 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5681 uint32_t val, uint32_t mask)
5682 {
5683 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5684 }
5685
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5686 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5687 uint32_t reg0, uint32_t reg1,
5688 uint32_t ref, uint32_t mask)
5689 {
5690 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5691 struct amdgpu_device *adev = ring->adev;
5692 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5693 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5694
5695 if (fw_version_ok)
5696 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5697 ref, mask, 0x20);
5698 else
5699 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5700 ref, mask);
5701 }
5702
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5703 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5704 {
5705 struct amdgpu_device *adev = ring->adev;
5706 uint32_t value = 0;
5707
5708 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5709 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5710 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5711 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5712 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5713 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5714 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5715 }
5716
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5717 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5718 enum amdgpu_interrupt_state state)
5719 {
5720 switch (state) {
5721 case AMDGPU_IRQ_STATE_DISABLE:
5722 case AMDGPU_IRQ_STATE_ENABLE:
5723 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5724 TIME_STAMP_INT_ENABLE,
5725 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5726 break;
5727 default:
5728 break;
5729 }
5730 }
5731
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5732 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5733 int me, int pipe,
5734 enum amdgpu_interrupt_state state)
5735 {
5736 u32 mec_int_cntl, mec_int_cntl_reg;
5737
5738 /*
5739 * amdgpu controls only the first MEC. That's why this function only
5740 * handles the setting of interrupts for this specific MEC. All other
5741 * pipes' interrupts are set by amdkfd.
5742 */
5743
5744 if (me == 1) {
5745 switch (pipe) {
5746 case 0:
5747 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5748 break;
5749 case 1:
5750 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5751 break;
5752 case 2:
5753 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5754 break;
5755 case 3:
5756 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5757 break;
5758 default:
5759 DRM_DEBUG("invalid pipe %d\n", pipe);
5760 return;
5761 }
5762 } else {
5763 DRM_DEBUG("invalid me %d\n", me);
5764 return;
5765 }
5766
5767 switch (state) {
5768 case AMDGPU_IRQ_STATE_DISABLE:
5769 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5770 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5771 TIME_STAMP_INT_ENABLE, 0);
5772 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5773 break;
5774 case AMDGPU_IRQ_STATE_ENABLE:
5775 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5776 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5777 TIME_STAMP_INT_ENABLE, 1);
5778 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5779 break;
5780 default:
5781 break;
5782 }
5783 }
5784
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)5785 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5786 struct amdgpu_irq_src *source,
5787 unsigned type,
5788 enum amdgpu_interrupt_state state)
5789 {
5790 switch (state) {
5791 case AMDGPU_IRQ_STATE_DISABLE:
5792 case AMDGPU_IRQ_STATE_ENABLE:
5793 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5794 PRIV_REG_INT_ENABLE,
5795 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5796 break;
5797 default:
5798 break;
5799 }
5800
5801 return 0;
5802 }
5803
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)5804 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5805 struct amdgpu_irq_src *source,
5806 unsigned type,
5807 enum amdgpu_interrupt_state state)
5808 {
5809 switch (state) {
5810 case AMDGPU_IRQ_STATE_DISABLE:
5811 case AMDGPU_IRQ_STATE_ENABLE:
5812 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5813 PRIV_INSTR_INT_ENABLE,
5814 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5815 break;
5816 default:
5817 break;
5818 }
5819
5820 return 0;
5821 }
5822
5823 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5824 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5825 CP_ECC_ERROR_INT_ENABLE, 1)
5826
5827 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5828 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5829 CP_ECC_ERROR_INT_ENABLE, 0)
5830
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)5831 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5832 struct amdgpu_irq_src *source,
5833 unsigned type,
5834 enum amdgpu_interrupt_state state)
5835 {
5836 switch (state) {
5837 case AMDGPU_IRQ_STATE_DISABLE:
5838 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5839 CP_ECC_ERROR_INT_ENABLE, 0);
5840 DISABLE_ECC_ON_ME_PIPE(1, 0);
5841 DISABLE_ECC_ON_ME_PIPE(1, 1);
5842 DISABLE_ECC_ON_ME_PIPE(1, 2);
5843 DISABLE_ECC_ON_ME_PIPE(1, 3);
5844 break;
5845
5846 case AMDGPU_IRQ_STATE_ENABLE:
5847 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5848 CP_ECC_ERROR_INT_ENABLE, 1);
5849 ENABLE_ECC_ON_ME_PIPE(1, 0);
5850 ENABLE_ECC_ON_ME_PIPE(1, 1);
5851 ENABLE_ECC_ON_ME_PIPE(1, 2);
5852 ENABLE_ECC_ON_ME_PIPE(1, 3);
5853 break;
5854 default:
5855 break;
5856 }
5857
5858 return 0;
5859 }
5860
5861
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)5862 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5863 struct amdgpu_irq_src *src,
5864 unsigned type,
5865 enum amdgpu_interrupt_state state)
5866 {
5867 switch (type) {
5868 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5869 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5870 break;
5871 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5872 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5873 break;
5874 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5875 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5876 break;
5877 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5878 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5879 break;
5880 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5881 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5882 break;
5883 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5884 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5885 break;
5886 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5887 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5888 break;
5889 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5890 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5891 break;
5892 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5893 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5894 break;
5895 default:
5896 break;
5897 }
5898 return 0;
5899 }
5900
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)5901 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5902 struct amdgpu_irq_src *source,
5903 struct amdgpu_iv_entry *entry)
5904 {
5905 int i;
5906 u8 me_id, pipe_id, queue_id;
5907 struct amdgpu_ring *ring;
5908
5909 DRM_DEBUG("IH: CP EOP\n");
5910 me_id = (entry->ring_id & 0x0c) >> 2;
5911 pipe_id = (entry->ring_id & 0x03) >> 0;
5912 queue_id = (entry->ring_id & 0x70) >> 4;
5913
5914 switch (me_id) {
5915 case 0:
5916 if (adev->gfx.num_gfx_rings &&
5917 !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5918 /* Fence signals are handled on the software rings*/
5919 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5920 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5921 }
5922 break;
5923 case 1:
5924 case 2:
5925 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5926 ring = &adev->gfx.compute_ring[i];
5927 /* Per-queue interrupt is supported for MEC starting from VI.
5928 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5929 */
5930 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5931 amdgpu_fence_process(ring);
5932 }
5933 break;
5934 }
5935 return 0;
5936 }
5937
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)5938 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5939 struct amdgpu_iv_entry *entry)
5940 {
5941 u8 me_id, pipe_id, queue_id;
5942 struct amdgpu_ring *ring;
5943 int i;
5944
5945 me_id = (entry->ring_id & 0x0c) >> 2;
5946 pipe_id = (entry->ring_id & 0x03) >> 0;
5947 queue_id = (entry->ring_id & 0x70) >> 4;
5948
5949 switch (me_id) {
5950 case 0:
5951 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5952 break;
5953 case 1:
5954 case 2:
5955 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5956 ring = &adev->gfx.compute_ring[i];
5957 if (ring->me == me_id && ring->pipe == pipe_id &&
5958 ring->queue == queue_id)
5959 drm_sched_fault(&ring->sched);
5960 }
5961 break;
5962 }
5963 }
5964
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)5965 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5966 struct amdgpu_irq_src *source,
5967 struct amdgpu_iv_entry *entry)
5968 {
5969 DRM_ERROR("Illegal register access in command stream\n");
5970 gfx_v9_0_fault(adev, entry);
5971 return 0;
5972 }
5973
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)5974 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5975 struct amdgpu_irq_src *source,
5976 struct amdgpu_iv_entry *entry)
5977 {
5978 DRM_ERROR("Illegal instruction in command stream\n");
5979 gfx_v9_0_fault(adev, entry);
5980 return 0;
5981 }
5982
5983
5984 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5985 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5986 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5987 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5988 },
5989 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5990 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5991 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5992 },
5993 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5994 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5995 0, 0
5996 },
5997 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5998 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5999 0, 0
6000 },
6001 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6002 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6003 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6004 },
6005 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6006 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6007 0, 0
6008 },
6009 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6010 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6011 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6012 },
6013 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6014 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6015 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6016 },
6017 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6018 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6019 0, 0
6020 },
6021 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6022 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6023 0, 0
6024 },
6025 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6026 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6027 0, 0
6028 },
6029 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6030 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6031 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6032 },
6033 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6034 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6035 0, 0
6036 },
6037 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6038 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6039 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6040 },
6041 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6042 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6043 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6044 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6045 },
6046 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6047 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6048 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6049 0, 0
6050 },
6051 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6052 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6053 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6054 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6055 },
6056 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6057 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6058 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6059 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6060 },
6061 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6062 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6063 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6064 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6065 },
6066 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6067 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6068 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6069 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6070 },
6071 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6072 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6073 0, 0
6074 },
6075 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6076 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6077 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6078 },
6079 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6080 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6081 0, 0
6082 },
6083 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6084 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6085 0, 0
6086 },
6087 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6088 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6089 0, 0
6090 },
6091 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6092 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6093 0, 0
6094 },
6095 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6096 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6097 0, 0
6098 },
6099 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6100 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6101 0, 0
6102 },
6103 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6104 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6105 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6106 },
6107 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6108 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6109 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6110 },
6111 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6112 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6113 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6114 },
6115 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6116 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6117 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6118 },
6119 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6120 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6121 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6122 },
6123 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6124 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6125 0, 0
6126 },
6127 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6128 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6129 0, 0
6130 },
6131 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6132 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6133 0, 0
6134 },
6135 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6136 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6137 0, 0
6138 },
6139 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6140 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6141 0, 0
6142 },
6143 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6144 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6145 0, 0
6146 },
6147 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6148 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6149 0, 0
6150 },
6151 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6152 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6153 0, 0
6154 },
6155 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6156 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6157 0, 0
6158 },
6159 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6160 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6161 0, 0
6162 },
6163 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6164 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6165 0, 0
6166 },
6167 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6168 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6169 0, 0
6170 },
6171 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6172 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6173 0, 0
6174 },
6175 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6176 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6177 0, 0
6178 },
6179 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6180 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6181 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6182 },
6183 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6184 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6185 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6186 },
6187 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6188 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6189 0, 0
6190 },
6191 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6192 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6193 0, 0
6194 },
6195 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6196 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6197 0, 0
6198 },
6199 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6200 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6201 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6202 },
6203 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6204 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6205 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6206 },
6207 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6208 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6209 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6210 },
6211 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6212 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6213 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6214 },
6215 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6216 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6217 0, 0
6218 },
6219 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6220 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6221 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6222 },
6223 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6224 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6225 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6226 },
6227 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6228 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6229 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6230 },
6231 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6232 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6233 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6234 },
6235 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6236 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6237 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6238 },
6239 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6240 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6241 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6242 },
6243 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6244 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6245 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6246 },
6247 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6248 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6249 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6250 },
6251 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6252 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6253 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6254 },
6255 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6256 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6257 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6258 },
6259 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6260 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6261 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6262 },
6263 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6264 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6265 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6266 },
6267 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6268 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6269 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6270 },
6271 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6272 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6273 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6274 },
6275 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6276 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6277 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6278 },
6279 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6280 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6281 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6282 },
6283 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6284 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6285 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6286 },
6287 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6288 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6289 0, 0
6290 },
6291 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6292 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6293 0, 0
6294 },
6295 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6296 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6297 0, 0
6298 },
6299 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6300 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6301 0, 0
6302 },
6303 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6304 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6305 0, 0
6306 },
6307 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6308 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6309 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6310 },
6311 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6312 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6313 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6314 },
6315 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6316 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6317 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6318 },
6319 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6320 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6321 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6322 },
6323 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6324 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6325 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6326 },
6327 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6328 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6329 0, 0
6330 },
6331 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6332 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6333 0, 0
6334 },
6335 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6336 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6337 0, 0
6338 },
6339 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6340 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6341 0, 0
6342 },
6343 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6344 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6345 0, 0
6346 },
6347 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6348 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6349 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6350 },
6351 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6352 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6353 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6354 },
6355 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6356 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6357 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6358 },
6359 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6360 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6361 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6362 },
6363 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6364 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6365 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6366 },
6367 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6368 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6369 0, 0
6370 },
6371 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6372 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6373 0, 0
6374 },
6375 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6376 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6377 0, 0
6378 },
6379 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6380 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6381 0, 0
6382 },
6383 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6384 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6385 0, 0
6386 },
6387 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6388 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6389 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6390 },
6391 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6392 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6393 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6394 },
6395 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6396 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6397 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6398 },
6399 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6400 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6401 0, 0
6402 },
6403 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6404 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6405 0, 0
6406 },
6407 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6408 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6409 0, 0
6410 },
6411 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6412 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6413 0, 0
6414 },
6415 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6416 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6417 0, 0
6418 },
6419 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6420 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6421 0, 0
6422 }
6423 };
6424
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6425 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6426 void *inject_if, uint32_t instance_mask)
6427 {
6428 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6429 int ret;
6430 struct ta_ras_trigger_error_input block_info = { 0 };
6431
6432 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6433 return -EINVAL;
6434
6435 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6436 return -EINVAL;
6437
6438 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6439 return -EPERM;
6440
6441 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6442 info->head.type)) {
6443 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6444 ras_gfx_subblocks[info->head.sub_block_index].name,
6445 info->head.type);
6446 return -EPERM;
6447 }
6448
6449 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6450 info->head.type)) {
6451 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6452 ras_gfx_subblocks[info->head.sub_block_index].name,
6453 info->head.type);
6454 return -EPERM;
6455 }
6456
6457 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6458 block_info.sub_block_index =
6459 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6460 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6461 block_info.address = info->address;
6462 block_info.value = info->value;
6463
6464 mutex_lock(&adev->grbm_idx_mutex);
6465 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6466 mutex_unlock(&adev->grbm_idx_mutex);
6467
6468 return ret;
6469 }
6470
6471 static const char *vml2_mems[] = {
6472 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6473 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6474 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6475 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6476 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6477 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6478 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6479 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6480 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6481 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6482 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6483 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6484 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6485 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6486 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6487 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6488 };
6489
6490 static const char *vml2_walker_mems[] = {
6491 "UTC_VML2_CACHE_PDE0_MEM0",
6492 "UTC_VML2_CACHE_PDE0_MEM1",
6493 "UTC_VML2_CACHE_PDE1_MEM0",
6494 "UTC_VML2_CACHE_PDE1_MEM1",
6495 "UTC_VML2_CACHE_PDE2_MEM0",
6496 "UTC_VML2_CACHE_PDE2_MEM1",
6497 "UTC_VML2_RDIF_LOG_FIFO",
6498 };
6499
6500 static const char *atc_l2_cache_2m_mems[] = {
6501 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6502 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6503 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6504 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6505 };
6506
6507 static const char *atc_l2_cache_4k_mems[] = {
6508 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6509 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6510 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6511 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6512 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6513 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6514 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6515 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6516 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6517 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6518 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6519 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6520 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6521 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6522 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6523 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6524 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6525 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6526 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6527 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6528 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6529 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6530 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6531 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6532 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6533 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6534 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6535 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6536 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6537 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6538 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6539 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6540 };
6541
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6542 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6543 struct ras_err_data *err_data)
6544 {
6545 uint32_t i, data;
6546 uint32_t sec_count, ded_count;
6547
6548 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6549 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6550 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6551 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6552 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6553 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6554 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6555 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6556
6557 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6558 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6559 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6560
6561 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6562 if (sec_count) {
6563 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6564 "SEC %d\n", i, vml2_mems[i], sec_count);
6565 err_data->ce_count += sec_count;
6566 }
6567
6568 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6569 if (ded_count) {
6570 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6571 "DED %d\n", i, vml2_mems[i], ded_count);
6572 err_data->ue_count += ded_count;
6573 }
6574 }
6575
6576 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6577 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6578 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6579
6580 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6581 SEC_COUNT);
6582 if (sec_count) {
6583 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6584 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6585 err_data->ce_count += sec_count;
6586 }
6587
6588 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6589 DED_COUNT);
6590 if (ded_count) {
6591 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6592 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6593 err_data->ue_count += ded_count;
6594 }
6595 }
6596
6597 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6598 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6599 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6600
6601 sec_count = (data & 0x00006000L) >> 0xd;
6602 if (sec_count) {
6603 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6604 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6605 sec_count);
6606 err_data->ce_count += sec_count;
6607 }
6608 }
6609
6610 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6611 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6612 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6613
6614 sec_count = (data & 0x00006000L) >> 0xd;
6615 if (sec_count) {
6616 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6618 sec_count);
6619 err_data->ce_count += sec_count;
6620 }
6621
6622 ded_count = (data & 0x00018000L) >> 0xf;
6623 if (ded_count) {
6624 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6625 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6626 ded_count);
6627 err_data->ue_count += ded_count;
6628 }
6629 }
6630
6631 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6632 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6633 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6634 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6635
6636 return 0;
6637 }
6638
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6639 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6640 const struct soc15_reg_entry *reg,
6641 uint32_t se_id, uint32_t inst_id, uint32_t value,
6642 uint32_t *sec_count, uint32_t *ded_count)
6643 {
6644 uint32_t i;
6645 uint32_t sec_cnt, ded_cnt;
6646
6647 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6648 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6649 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6650 gfx_v9_0_ras_fields[i].inst != reg->inst)
6651 continue;
6652
6653 sec_cnt = (value &
6654 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6655 gfx_v9_0_ras_fields[i].sec_count_shift;
6656 if (sec_cnt) {
6657 dev_info(adev->dev, "GFX SubBlock %s, "
6658 "Instance[%d][%d], SEC %d\n",
6659 gfx_v9_0_ras_fields[i].name,
6660 se_id, inst_id,
6661 sec_cnt);
6662 *sec_count += sec_cnt;
6663 }
6664
6665 ded_cnt = (value &
6666 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6667 gfx_v9_0_ras_fields[i].ded_count_shift;
6668 if (ded_cnt) {
6669 dev_info(adev->dev, "GFX SubBlock %s, "
6670 "Instance[%d][%d], DED %d\n",
6671 gfx_v9_0_ras_fields[i].name,
6672 se_id, inst_id,
6673 ded_cnt);
6674 *ded_count += ded_cnt;
6675 }
6676 }
6677
6678 return 0;
6679 }
6680
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)6681 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6682 {
6683 int i, j, k;
6684
6685 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6686 return;
6687
6688 /* read back registers to clear the counters */
6689 mutex_lock(&adev->grbm_idx_mutex);
6690 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6691 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6692 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6693 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6694 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6695 }
6696 }
6697 }
6698 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6699 mutex_unlock(&adev->grbm_idx_mutex);
6700
6701 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6702 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6703 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6704 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6705 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6706 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6707 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6708 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6709
6710 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6711 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6712 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6713 }
6714
6715 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6716 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6717 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6718 }
6719
6720 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6721 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6722 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6723 }
6724
6725 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6726 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6727 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6728 }
6729
6730 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6731 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6732 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6733 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6734 }
6735
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)6736 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6737 void *ras_error_status)
6738 {
6739 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6740 uint32_t sec_count = 0, ded_count = 0;
6741 uint32_t i, j, k;
6742 uint32_t reg_value;
6743
6744 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6745 return;
6746
6747 err_data->ue_count = 0;
6748 err_data->ce_count = 0;
6749
6750 mutex_lock(&adev->grbm_idx_mutex);
6751
6752 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6753 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6754 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6755 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6756 reg_value =
6757 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6758 if (reg_value)
6759 gfx_v9_0_ras_error_count(adev,
6760 &gfx_v9_0_edc_counter_regs[i],
6761 j, k, reg_value,
6762 &sec_count, &ded_count);
6763 }
6764 }
6765 }
6766
6767 err_data->ce_count += sec_count;
6768 err_data->ue_count += ded_count;
6769
6770 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6771 mutex_unlock(&adev->grbm_idx_mutex);
6772
6773 gfx_v9_0_query_utc_edc_status(adev, err_data);
6774 }
6775
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)6776 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6777 {
6778 const unsigned int cp_coher_cntl =
6779 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6780 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6781 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6782 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6783 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6784
6785 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6786 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6787 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6788 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6789 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6790 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6791 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6792 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6793 }
6794
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)6795 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6796 uint32_t pipe, bool enable)
6797 {
6798 struct amdgpu_device *adev = ring->adev;
6799 uint32_t val;
6800 uint32_t wcl_cs_reg;
6801
6802 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6803 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6804
6805 switch (pipe) {
6806 case 0:
6807 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6808 break;
6809 case 1:
6810 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6811 break;
6812 case 2:
6813 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6814 break;
6815 case 3:
6816 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6817 break;
6818 default:
6819 DRM_DEBUG("invalid pipe %d\n", pipe);
6820 return;
6821 }
6822
6823 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6824
6825 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)6826 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6827 {
6828 struct amdgpu_device *adev = ring->adev;
6829 uint32_t val;
6830 int i;
6831
6832
6833 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6834 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6835 * around 25% of gpu resources.
6836 */
6837 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6838 amdgpu_ring_emit_wreg(ring,
6839 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6840 val);
6841
6842 /* Restrict waves for normal/low priority compute queues as well
6843 * to get best QoS for high priority compute jobs.
6844 *
6845 * amdgpu controls only 1st ME(0-3 CS pipes).
6846 */
6847 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6848 if (i != ring->pipe)
6849 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6850
6851 }
6852 }
6853
6854 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6855 .name = "gfx_v9_0",
6856 .early_init = gfx_v9_0_early_init,
6857 .late_init = gfx_v9_0_late_init,
6858 .sw_init = gfx_v9_0_sw_init,
6859 .sw_fini = gfx_v9_0_sw_fini,
6860 .hw_init = gfx_v9_0_hw_init,
6861 .hw_fini = gfx_v9_0_hw_fini,
6862 .suspend = gfx_v9_0_suspend,
6863 .resume = gfx_v9_0_resume,
6864 .is_idle = gfx_v9_0_is_idle,
6865 .wait_for_idle = gfx_v9_0_wait_for_idle,
6866 .soft_reset = gfx_v9_0_soft_reset,
6867 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6868 .set_powergating_state = gfx_v9_0_set_powergating_state,
6869 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6870 };
6871
6872 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6873 .type = AMDGPU_RING_TYPE_GFX,
6874 .align_mask = 0xff,
6875 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6876 .support_64bit_ptrs = true,
6877 .secure_submission_supported = true,
6878 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6879 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6880 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6881 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6882 5 + /* COND_EXEC */
6883 7 + /* PIPELINE_SYNC */
6884 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6885 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6886 2 + /* VM_FLUSH */
6887 8 + /* FENCE for VM_FLUSH */
6888 20 + /* GDS switch */
6889 4 + /* double SWITCH_BUFFER,
6890 the first COND_EXEC jump to the place just
6891 prior to this double SWITCH_BUFFER */
6892 5 + /* COND_EXEC */
6893 7 + /* HDP_flush */
6894 4 + /* VGT_flush */
6895 14 + /* CE_META */
6896 31 + /* DE_META */
6897 3 + /* CNTX_CTRL */
6898 5 + /* HDP_INVL */
6899 8 + 8 + /* FENCE x2 */
6900 2 + /* SWITCH_BUFFER */
6901 7, /* gfx_v9_0_emit_mem_sync */
6902 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6903 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6904 .emit_fence = gfx_v9_0_ring_emit_fence,
6905 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6906 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6907 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6908 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6909 .test_ring = gfx_v9_0_ring_test_ring,
6910 .insert_nop = amdgpu_ring_insert_nop,
6911 .pad_ib = amdgpu_ring_generic_pad_ib,
6912 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6913 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6914 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6915 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6916 .preempt_ib = gfx_v9_0_ring_preempt_ib,
6917 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6918 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6919 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6920 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6921 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6922 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6923 };
6924
6925 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6926 .type = AMDGPU_RING_TYPE_GFX,
6927 .align_mask = 0xff,
6928 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6929 .support_64bit_ptrs = true,
6930 .secure_submission_supported = true,
6931 .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6932 .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6933 .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6934 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6935 5 + /* COND_EXEC */
6936 7 + /* PIPELINE_SYNC */
6937 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6938 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6939 2 + /* VM_FLUSH */
6940 8 + /* FENCE for VM_FLUSH */
6941 20 + /* GDS switch */
6942 4 + /* double SWITCH_BUFFER,
6943 * the first COND_EXEC jump to the place just
6944 * prior to this double SWITCH_BUFFER
6945 */
6946 5 + /* COND_EXEC */
6947 7 + /* HDP_flush */
6948 4 + /* VGT_flush */
6949 14 + /* CE_META */
6950 31 + /* DE_META */
6951 3 + /* CNTX_CTRL */
6952 5 + /* HDP_INVL */
6953 8 + 8 + /* FENCE x2 */
6954 2 + /* SWITCH_BUFFER */
6955 7, /* gfx_v9_0_emit_mem_sync */
6956 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6957 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6958 .emit_fence = gfx_v9_0_ring_emit_fence,
6959 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6960 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6961 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6962 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6963 .test_ring = gfx_v9_0_ring_test_ring,
6964 .test_ib = gfx_v9_0_ring_test_ib,
6965 .insert_nop = amdgpu_sw_ring_insert_nop,
6966 .pad_ib = amdgpu_ring_generic_pad_ib,
6967 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6968 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6969 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6970 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6971 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6972 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6973 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6974 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6975 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6976 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6977 .patch_cntl = gfx_v9_0_ring_patch_cntl,
6978 .patch_de = gfx_v9_0_ring_patch_de_meta,
6979 .patch_ce = gfx_v9_0_ring_patch_ce_meta,
6980 };
6981
6982 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6983 .type = AMDGPU_RING_TYPE_COMPUTE,
6984 .align_mask = 0xff,
6985 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6986 .support_64bit_ptrs = true,
6987 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6988 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6989 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6990 .emit_frame_size =
6991 20 + /* gfx_v9_0_ring_emit_gds_switch */
6992 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6993 5 + /* hdp invalidate */
6994 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6995 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6996 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6997 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6998 7 + /* gfx_v9_0_emit_mem_sync */
6999 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7000 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7001 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7002 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7003 .emit_fence = gfx_v9_0_ring_emit_fence,
7004 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7005 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7006 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7007 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7008 .test_ring = gfx_v9_0_ring_test_ring,
7009 .test_ib = gfx_v9_0_ring_test_ib,
7010 .insert_nop = amdgpu_ring_insert_nop,
7011 .pad_ib = amdgpu_ring_generic_pad_ib,
7012 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7013 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7014 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7015 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7016 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7017 };
7018
7019 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7020 .type = AMDGPU_RING_TYPE_KIQ,
7021 .align_mask = 0xff,
7022 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7023 .support_64bit_ptrs = true,
7024 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7025 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7026 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7027 .emit_frame_size =
7028 20 + /* gfx_v9_0_ring_emit_gds_switch */
7029 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7030 5 + /* hdp invalidate */
7031 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7032 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7033 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7034 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7035 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7036 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7037 .test_ring = gfx_v9_0_ring_test_ring,
7038 .insert_nop = amdgpu_ring_insert_nop,
7039 .pad_ib = amdgpu_ring_generic_pad_ib,
7040 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7041 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7042 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7043 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7044 };
7045
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7046 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7047 {
7048 int i;
7049
7050 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7051
7052 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7053 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7054
7055 if (adev->gfx.num_gfx_rings) {
7056 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7057 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7058 }
7059
7060 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7061 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7062 }
7063
7064 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7065 .set = gfx_v9_0_set_eop_interrupt_state,
7066 .process = gfx_v9_0_eop_irq,
7067 };
7068
7069 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7070 .set = gfx_v9_0_set_priv_reg_fault_state,
7071 .process = gfx_v9_0_priv_reg_irq,
7072 };
7073
7074 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7075 .set = gfx_v9_0_set_priv_inst_fault_state,
7076 .process = gfx_v9_0_priv_inst_irq,
7077 };
7078
7079 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7080 .set = gfx_v9_0_set_cp_ecc_error_state,
7081 .process = amdgpu_gfx_cp_ecc_error_irq,
7082 };
7083
7084
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7085 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7086 {
7087 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7088 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7089
7090 adev->gfx.priv_reg_irq.num_types = 1;
7091 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7092
7093 adev->gfx.priv_inst_irq.num_types = 1;
7094 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7095
7096 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7097 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7098 }
7099
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7100 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7101 {
7102 switch (adev->ip_versions[GC_HWIP][0]) {
7103 case IP_VERSION(9, 0, 1):
7104 case IP_VERSION(9, 2, 1):
7105 case IP_VERSION(9, 4, 0):
7106 case IP_VERSION(9, 2, 2):
7107 case IP_VERSION(9, 1, 0):
7108 case IP_VERSION(9, 4, 1):
7109 case IP_VERSION(9, 3, 0):
7110 case IP_VERSION(9, 4, 2):
7111 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7112 break;
7113 default:
7114 break;
7115 }
7116 }
7117
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7118 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7119 {
7120 /* init asci gds info */
7121 switch (adev->ip_versions[GC_HWIP][0]) {
7122 case IP_VERSION(9, 0, 1):
7123 case IP_VERSION(9, 2, 1):
7124 case IP_VERSION(9, 4, 0):
7125 adev->gds.gds_size = 0x10000;
7126 break;
7127 case IP_VERSION(9, 2, 2):
7128 case IP_VERSION(9, 1, 0):
7129 case IP_VERSION(9, 4, 1):
7130 adev->gds.gds_size = 0x1000;
7131 break;
7132 case IP_VERSION(9, 4, 2):
7133 /* aldebaran removed all the GDS internal memory,
7134 * only support GWS opcode in kernel, like barrier
7135 * semaphore.etc */
7136 adev->gds.gds_size = 0;
7137 break;
7138 default:
7139 adev->gds.gds_size = 0x10000;
7140 break;
7141 }
7142
7143 switch (adev->ip_versions[GC_HWIP][0]) {
7144 case IP_VERSION(9, 0, 1):
7145 case IP_VERSION(9, 4, 0):
7146 adev->gds.gds_compute_max_wave_id = 0x7ff;
7147 break;
7148 case IP_VERSION(9, 2, 1):
7149 adev->gds.gds_compute_max_wave_id = 0x27f;
7150 break;
7151 case IP_VERSION(9, 2, 2):
7152 case IP_VERSION(9, 1, 0):
7153 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7154 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7155 else
7156 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7157 break;
7158 case IP_VERSION(9, 4, 1):
7159 adev->gds.gds_compute_max_wave_id = 0xfff;
7160 break;
7161 case IP_VERSION(9, 4, 2):
7162 /* deprecated for Aldebaran, no usage at all */
7163 adev->gds.gds_compute_max_wave_id = 0;
7164 break;
7165 default:
7166 /* this really depends on the chip */
7167 adev->gds.gds_compute_max_wave_id = 0x7ff;
7168 break;
7169 }
7170
7171 adev->gds.gws_size = 64;
7172 adev->gds.oa_size = 16;
7173 }
7174
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7175 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7176 u32 bitmap)
7177 {
7178 u32 data;
7179
7180 if (!bitmap)
7181 return;
7182
7183 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7184 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7185
7186 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7187 }
7188
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7189 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7190 {
7191 u32 data, mask;
7192
7193 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7194 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7195
7196 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7197 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7198
7199 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7200
7201 return (~data) & mask;
7202 }
7203
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7204 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7205 struct amdgpu_cu_info *cu_info)
7206 {
7207 int i, j, k, counter, active_cu_number = 0;
7208 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7209 unsigned disable_masks[4 * 4];
7210
7211 if (!adev || !cu_info)
7212 return -EINVAL;
7213
7214 /*
7215 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7216 */
7217 if (adev->gfx.config.max_shader_engines *
7218 adev->gfx.config.max_sh_per_se > 16)
7219 return -EINVAL;
7220
7221 amdgpu_gfx_parse_disable_cu(disable_masks,
7222 adev->gfx.config.max_shader_engines,
7223 adev->gfx.config.max_sh_per_se);
7224
7225 mutex_lock(&adev->grbm_idx_mutex);
7226 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7227 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7228 mask = 1;
7229 ao_bitmap = 0;
7230 counter = 0;
7231 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7232 gfx_v9_0_set_user_cu_inactive_bitmap(
7233 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7234 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7235
7236 /*
7237 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7238 * 4x4 size array, and it's usually suitable for Vega
7239 * ASICs which has 4*2 SE/SH layout.
7240 * But for Arcturus, SE/SH layout is changed to 8*1.
7241 * To mostly reduce the impact, we make it compatible
7242 * with current bitmap array as below:
7243 * SE4,SH0 --> bitmap[0][1]
7244 * SE5,SH0 --> bitmap[1][1]
7245 * SE6,SH0 --> bitmap[2][1]
7246 * SE7,SH0 --> bitmap[3][1]
7247 */
7248 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7249
7250 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7251 if (bitmap & mask) {
7252 if (counter < adev->gfx.config.max_cu_per_sh)
7253 ao_bitmap |= mask;
7254 counter ++;
7255 }
7256 mask <<= 1;
7257 }
7258 active_cu_number += counter;
7259 if (i < 2 && j < 2)
7260 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7261 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7262 }
7263 }
7264 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7265 mutex_unlock(&adev->grbm_idx_mutex);
7266
7267 cu_info->number = active_cu_number;
7268 cu_info->ao_cu_mask = ao_cu_mask;
7269 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7270
7271 return 0;
7272 }
7273
7274 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7275 {
7276 .type = AMD_IP_BLOCK_TYPE_GFX,
7277 .major = 9,
7278 .minor = 0,
7279 .rev = 0,
7280 .funcs = &gfx_v9_0_ip_funcs,
7281 };
7282