xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision abade675e02e1b73da0c20ffaf08fbe309038298)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "soc15.h"
29 #include "soc15d.h"
30 #include "amdgpu_atomfirmware.h"
31 #include "amdgpu_pm.h"
32 
33 #include "gc/gc_9_0_offset.h"
34 #include "gc/gc_9_0_sh_mask.h"
35 #include "vega10_enum.h"
36 #include "hdp/hdp_4_0_offset.h"
37 
38 #include "soc15.h"
39 #include "soc15_common.h"
40 #include "clearstate_gfx9.h"
41 #include "v9_structs.h"
42 
43 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
44 
45 #include "amdgpu_ras.h"
46 
47 #define GFX9_NUM_GFX_RINGS     1
48 #define GFX9_MEC_HPD_SIZE 4096
49 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
50 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
51 
52 #define mmPWR_MISC_CNTL_STATUS					0x0183
53 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
54 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
55 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
56 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
57 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
58 
59 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
61 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
62 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
63 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
65 
66 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
81 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/raven_me.bin");
83 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
84 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
88 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
90 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
94 
95 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
98 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
102 
103 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
104 {
105 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
106 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
107 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
108 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
109 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
110 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
111 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
112 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
113 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
114 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
115 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
116 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
117 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
118 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
119 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
120 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
121 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
122 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
123 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
124 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
125 };
126 
127 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
128 {
129 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
130 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
131 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
132 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
146 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
147 };
148 
149 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
150 {
151 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
152 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
153 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
154 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
162 };
163 
164 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
165 {
166 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
167 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
168 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
169 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
170 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
171 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
183 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
184 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
185 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
186 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
190 };
191 
192 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
193 {
194 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
195 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
196 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
197 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
201 };
202 
203 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
204 {
205 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
206 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
207 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
208 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
211 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
212 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
213 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
214 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
222 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
223 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
224 };
225 
226 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
227 {
228 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
229 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
230 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
231 };
232 
233 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
234 {
235 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
236 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
237 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
238 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
245 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
246 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
251 };
252 
253 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
254 {
255 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
256 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
257 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
258 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
268 };
269 
270 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
271 {
272 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
273 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
274 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
275 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
276 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 };
281 
282 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
283 {
284 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
285 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
286 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
287 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
288 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 };
293 
294 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
295 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
296 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
297 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
298 
299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
300 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
301 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
302 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
303 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
304                                  struct amdgpu_cu_info *cu_info);
305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
306 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
307 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
308 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
309 
310 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
311 {
312 	switch (adev->asic_type) {
313 	case CHIP_VEGA10:
314 		if (!amdgpu_virt_support_skip_setting(adev)) {
315 			soc15_program_register_sequence(adev,
316 							 golden_settings_gc_9_0,
317 							 ARRAY_SIZE(golden_settings_gc_9_0));
318 			soc15_program_register_sequence(adev,
319 							 golden_settings_gc_9_0_vg10,
320 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
321 		}
322 		break;
323 	case CHIP_VEGA12:
324 		soc15_program_register_sequence(adev,
325 						golden_settings_gc_9_2_1,
326 						ARRAY_SIZE(golden_settings_gc_9_2_1));
327 		soc15_program_register_sequence(adev,
328 						golden_settings_gc_9_2_1_vg12,
329 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
330 		break;
331 	case CHIP_VEGA20:
332 		soc15_program_register_sequence(adev,
333 						golden_settings_gc_9_0,
334 						ARRAY_SIZE(golden_settings_gc_9_0));
335 		soc15_program_register_sequence(adev,
336 						golden_settings_gc_9_0_vg20,
337 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
338 		break;
339 	case CHIP_RAVEN:
340 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
341 						ARRAY_SIZE(golden_settings_gc_9_1));
342 		if (adev->rev_id >= 8)
343 			soc15_program_register_sequence(adev,
344 							golden_settings_gc_9_1_rv2,
345 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
346 		else
347 			soc15_program_register_sequence(adev,
348 							golden_settings_gc_9_1_rv1,
349 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
350 		break;
351 	default:
352 		break;
353 	}
354 
355 	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
356 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
357 }
358 
359 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
360 {
361 	adev->gfx.scratch.num_reg = 8;
362 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
363 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
364 }
365 
366 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
367 				       bool wc, uint32_t reg, uint32_t val)
368 {
369 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
370 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
371 				WRITE_DATA_DST_SEL(0) |
372 				(wc ? WR_CONFIRM : 0));
373 	amdgpu_ring_write(ring, reg);
374 	amdgpu_ring_write(ring, 0);
375 	amdgpu_ring_write(ring, val);
376 }
377 
378 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
379 				  int mem_space, int opt, uint32_t addr0,
380 				  uint32_t addr1, uint32_t ref, uint32_t mask,
381 				  uint32_t inv)
382 {
383 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
384 	amdgpu_ring_write(ring,
385 				 /* memory (1) or register (0) */
386 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
387 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
388 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
389 				 WAIT_REG_MEM_ENGINE(eng_sel)));
390 
391 	if (mem_space)
392 		BUG_ON(addr0 & 0x3); /* Dword align */
393 	amdgpu_ring_write(ring, addr0);
394 	amdgpu_ring_write(ring, addr1);
395 	amdgpu_ring_write(ring, ref);
396 	amdgpu_ring_write(ring, mask);
397 	amdgpu_ring_write(ring, inv); /* poll interval */
398 }
399 
400 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
401 {
402 	struct amdgpu_device *adev = ring->adev;
403 	uint32_t scratch;
404 	uint32_t tmp = 0;
405 	unsigned i;
406 	int r;
407 
408 	r = amdgpu_gfx_scratch_get(adev, &scratch);
409 	if (r)
410 		return r;
411 
412 	WREG32(scratch, 0xCAFEDEAD);
413 	r = amdgpu_ring_alloc(ring, 3);
414 	if (r)
415 		goto error_free_scratch;
416 
417 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
418 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
419 	amdgpu_ring_write(ring, 0xDEADBEEF);
420 	amdgpu_ring_commit(ring);
421 
422 	for (i = 0; i < adev->usec_timeout; i++) {
423 		tmp = RREG32(scratch);
424 		if (tmp == 0xDEADBEEF)
425 			break;
426 		DRM_UDELAY(1);
427 	}
428 
429 	if (i >= adev->usec_timeout)
430 		r = -ETIMEDOUT;
431 
432 error_free_scratch:
433 	amdgpu_gfx_scratch_free(adev, scratch);
434 	return r;
435 }
436 
437 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
438 {
439 	struct amdgpu_device *adev = ring->adev;
440 	struct amdgpu_ib ib;
441 	struct dma_fence *f = NULL;
442 
443 	unsigned index;
444 	uint64_t gpu_addr;
445 	uint32_t tmp;
446 	long r;
447 
448 	r = amdgpu_device_wb_get(adev, &index);
449 	if (r)
450 		return r;
451 
452 	gpu_addr = adev->wb.gpu_addr + (index * 4);
453 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
454 	memset(&ib, 0, sizeof(ib));
455 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
456 	if (r)
457 		goto err1;
458 
459 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
460 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
461 	ib.ptr[2] = lower_32_bits(gpu_addr);
462 	ib.ptr[3] = upper_32_bits(gpu_addr);
463 	ib.ptr[4] = 0xDEADBEEF;
464 	ib.length_dw = 5;
465 
466 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
467 	if (r)
468 		goto err2;
469 
470 	r = dma_fence_wait_timeout(f, false, timeout);
471 	if (r == 0) {
472 		r = -ETIMEDOUT;
473 		goto err2;
474 	} else if (r < 0) {
475 		goto err2;
476 	}
477 
478 	tmp = adev->wb.wb[index];
479 	if (tmp == 0xDEADBEEF)
480 		r = 0;
481 	else
482 		r = -EINVAL;
483 
484 err2:
485 	amdgpu_ib_free(adev, &ib, NULL);
486 	dma_fence_put(f);
487 err1:
488 	amdgpu_device_wb_free(adev, index);
489 	return r;
490 }
491 
492 
493 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
494 {
495 	release_firmware(adev->gfx.pfp_fw);
496 	adev->gfx.pfp_fw = NULL;
497 	release_firmware(adev->gfx.me_fw);
498 	adev->gfx.me_fw = NULL;
499 	release_firmware(adev->gfx.ce_fw);
500 	adev->gfx.ce_fw = NULL;
501 	release_firmware(adev->gfx.rlc_fw);
502 	adev->gfx.rlc_fw = NULL;
503 	release_firmware(adev->gfx.mec_fw);
504 	adev->gfx.mec_fw = NULL;
505 	release_firmware(adev->gfx.mec2_fw);
506 	adev->gfx.mec2_fw = NULL;
507 
508 	kfree(adev->gfx.rlc.register_list_format);
509 }
510 
511 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
512 {
513 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
514 
515 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
516 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
517 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
518 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
519 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
520 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
521 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
522 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
523 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
524 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
525 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
526 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
527 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
528 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
529 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
530 }
531 
532 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
533 {
534 	adev->gfx.me_fw_write_wait = false;
535 	adev->gfx.mec_fw_write_wait = false;
536 
537 	switch (adev->asic_type) {
538 	case CHIP_VEGA10:
539 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
540 		    (adev->gfx.me_feature_version >= 42) &&
541 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
542 		    (adev->gfx.pfp_feature_version >= 42))
543 			adev->gfx.me_fw_write_wait = true;
544 
545 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
546 		    (adev->gfx.mec_feature_version >= 42))
547 			adev->gfx.mec_fw_write_wait = true;
548 		break;
549 	case CHIP_VEGA12:
550 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
551 		    (adev->gfx.me_feature_version >= 44) &&
552 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
553 		    (adev->gfx.pfp_feature_version >= 44))
554 			adev->gfx.me_fw_write_wait = true;
555 
556 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
557 		    (adev->gfx.mec_feature_version >= 44))
558 			adev->gfx.mec_fw_write_wait = true;
559 		break;
560 	case CHIP_VEGA20:
561 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
562 		    (adev->gfx.me_feature_version >= 44) &&
563 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
564 		    (adev->gfx.pfp_feature_version >= 44))
565 			adev->gfx.me_fw_write_wait = true;
566 
567 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
568 		    (adev->gfx.mec_feature_version >= 44))
569 			adev->gfx.mec_fw_write_wait = true;
570 		break;
571 	case CHIP_RAVEN:
572 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
573 		    (adev->gfx.me_feature_version >= 42) &&
574 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
575 		    (adev->gfx.pfp_feature_version >= 42))
576 			adev->gfx.me_fw_write_wait = true;
577 
578 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
579 		    (adev->gfx.mec_feature_version >= 42))
580 			adev->gfx.mec_fw_write_wait = true;
581 		break;
582 	default:
583 		break;
584 	}
585 }
586 
587 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
588 {
589 	switch (adev->asic_type) {
590 	case CHIP_VEGA10:
591 	case CHIP_VEGA12:
592 	case CHIP_VEGA20:
593 		break;
594 	case CHIP_RAVEN:
595 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
596 			break;
597 		if ((adev->gfx.rlc_fw_version != 106 &&
598 		     adev->gfx.rlc_fw_version < 531) ||
599 		    (adev->gfx.rlc_fw_version == 53815) ||
600 		    (adev->gfx.rlc_feature_version < 1) ||
601 		    !adev->gfx.rlc.is_rlc_v2_1)
602 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
603 		break;
604 	default:
605 		break;
606 	}
607 }
608 
609 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
610 {
611 	const char *chip_name;
612 	char fw_name[30];
613 	int err;
614 	struct amdgpu_firmware_info *info = NULL;
615 	const struct common_firmware_header *header = NULL;
616 	const struct gfx_firmware_header_v1_0 *cp_hdr;
617 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
618 	unsigned int *tmp = NULL;
619 	unsigned int i = 0;
620 	uint16_t version_major;
621 	uint16_t version_minor;
622 	uint32_t smu_version;
623 
624 	DRM_DEBUG("\n");
625 
626 	switch (adev->asic_type) {
627 	case CHIP_VEGA10:
628 		chip_name = "vega10";
629 		break;
630 	case CHIP_VEGA12:
631 		chip_name = "vega12";
632 		break;
633 	case CHIP_VEGA20:
634 		chip_name = "vega20";
635 		break;
636 	case CHIP_RAVEN:
637 		if (adev->rev_id >= 8)
638 			chip_name = "raven2";
639 		else if (adev->pdev->device == 0x15d8)
640 			chip_name = "picasso";
641 		else
642 			chip_name = "raven";
643 		break;
644 	default:
645 		BUG();
646 	}
647 
648 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
649 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
650 	if (err)
651 		goto out;
652 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
653 	if (err)
654 		goto out;
655 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
656 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
657 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
658 
659 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
660 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
661 	if (err)
662 		goto out;
663 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
664 	if (err)
665 		goto out;
666 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
667 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
668 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
669 
670 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
671 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
672 	if (err)
673 		goto out;
674 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
675 	if (err)
676 		goto out;
677 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
678 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
679 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
680 
681 	/*
682 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
683 	 * instead of picasso_rlc.bin.
684 	 * Judgment method:
685 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
686 	 *          or revision >= 0xD8 && revision <= 0xDF
687 	 * otherwise is PCO FP5
688 	 */
689 	if (!strcmp(chip_name, "picasso") &&
690 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
691 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
692 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
693 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
694 		(smu_version >= 0x41e2b))
695 		/**
696 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
697 		*/
698 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
699 	else
700 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
701 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
702 	if (err)
703 		goto out;
704 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
705 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
706 
707 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
708 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
709 	if (version_major == 2 && version_minor == 1)
710 		adev->gfx.rlc.is_rlc_v2_1 = true;
711 
712 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
713 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
714 	adev->gfx.rlc.save_and_restore_offset =
715 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
716 	adev->gfx.rlc.clear_state_descriptor_offset =
717 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
718 	adev->gfx.rlc.avail_scratch_ram_locations =
719 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
720 	adev->gfx.rlc.reg_restore_list_size =
721 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
722 	adev->gfx.rlc.reg_list_format_start =
723 			le32_to_cpu(rlc_hdr->reg_list_format_start);
724 	adev->gfx.rlc.reg_list_format_separate_start =
725 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
726 	adev->gfx.rlc.starting_offsets_start =
727 			le32_to_cpu(rlc_hdr->starting_offsets_start);
728 	adev->gfx.rlc.reg_list_format_size_bytes =
729 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
730 	adev->gfx.rlc.reg_list_size_bytes =
731 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
732 	adev->gfx.rlc.register_list_format =
733 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
734 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
735 	if (!adev->gfx.rlc.register_list_format) {
736 		err = -ENOMEM;
737 		goto out;
738 	}
739 
740 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
741 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
742 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
743 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
744 
745 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
746 
747 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
748 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
749 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
750 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
751 
752 	if (adev->gfx.rlc.is_rlc_v2_1)
753 		gfx_v9_0_init_rlc_ext_microcode(adev);
754 
755 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
756 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
757 	if (err)
758 		goto out;
759 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
760 	if (err)
761 		goto out;
762 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
763 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
764 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
765 
766 
767 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
768 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
769 	if (!err) {
770 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
771 		if (err)
772 			goto out;
773 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
774 		adev->gfx.mec2_fw->data;
775 		adev->gfx.mec2_fw_version =
776 		le32_to_cpu(cp_hdr->header.ucode_version);
777 		adev->gfx.mec2_feature_version =
778 		le32_to_cpu(cp_hdr->ucode_feature_version);
779 	} else {
780 		err = 0;
781 		adev->gfx.mec2_fw = NULL;
782 	}
783 
784 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
785 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
786 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
787 		info->fw = adev->gfx.pfp_fw;
788 		header = (const struct common_firmware_header *)info->fw->data;
789 		adev->firmware.fw_size +=
790 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
791 
792 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
793 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
794 		info->fw = adev->gfx.me_fw;
795 		header = (const struct common_firmware_header *)info->fw->data;
796 		adev->firmware.fw_size +=
797 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
798 
799 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
800 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
801 		info->fw = adev->gfx.ce_fw;
802 		header = (const struct common_firmware_header *)info->fw->data;
803 		adev->firmware.fw_size +=
804 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
805 
806 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
807 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
808 		info->fw = adev->gfx.rlc_fw;
809 		header = (const struct common_firmware_header *)info->fw->data;
810 		adev->firmware.fw_size +=
811 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
812 
813 		if (adev->gfx.rlc.is_rlc_v2_1 &&
814 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
815 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
816 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
817 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
818 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
819 			info->fw = adev->gfx.rlc_fw;
820 			adev->firmware.fw_size +=
821 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
822 
823 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
824 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
825 			info->fw = adev->gfx.rlc_fw;
826 			adev->firmware.fw_size +=
827 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
828 
829 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
830 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
831 			info->fw = adev->gfx.rlc_fw;
832 			adev->firmware.fw_size +=
833 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
834 		}
835 
836 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
837 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
838 		info->fw = adev->gfx.mec_fw;
839 		header = (const struct common_firmware_header *)info->fw->data;
840 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
841 		adev->firmware.fw_size +=
842 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
843 
844 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
845 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
846 		info->fw = adev->gfx.mec_fw;
847 		adev->firmware.fw_size +=
848 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
849 
850 		if (adev->gfx.mec2_fw) {
851 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
852 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
853 			info->fw = adev->gfx.mec2_fw;
854 			header = (const struct common_firmware_header *)info->fw->data;
855 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
856 			adev->firmware.fw_size +=
857 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
858 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
859 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
860 			info->fw = adev->gfx.mec2_fw;
861 			adev->firmware.fw_size +=
862 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
863 		}
864 
865 	}
866 
867 out:
868 	gfx_v9_0_check_if_need_gfxoff(adev);
869 	gfx_v9_0_check_fw_write_wait(adev);
870 	if (err) {
871 		dev_err(adev->dev,
872 			"gfx9: Failed to load firmware \"%s\"\n",
873 			fw_name);
874 		release_firmware(adev->gfx.pfp_fw);
875 		adev->gfx.pfp_fw = NULL;
876 		release_firmware(adev->gfx.me_fw);
877 		adev->gfx.me_fw = NULL;
878 		release_firmware(adev->gfx.ce_fw);
879 		adev->gfx.ce_fw = NULL;
880 		release_firmware(adev->gfx.rlc_fw);
881 		adev->gfx.rlc_fw = NULL;
882 		release_firmware(adev->gfx.mec_fw);
883 		adev->gfx.mec_fw = NULL;
884 		release_firmware(adev->gfx.mec2_fw);
885 		adev->gfx.mec2_fw = NULL;
886 	}
887 	return err;
888 }
889 
890 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
891 {
892 	u32 count = 0;
893 	const struct cs_section_def *sect = NULL;
894 	const struct cs_extent_def *ext = NULL;
895 
896 	/* begin clear state */
897 	count += 2;
898 	/* context control state */
899 	count += 3;
900 
901 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
902 		for (ext = sect->section; ext->extent != NULL; ++ext) {
903 			if (sect->id == SECT_CONTEXT)
904 				count += 2 + ext->reg_count;
905 			else
906 				return 0;
907 		}
908 	}
909 
910 	/* end clear state */
911 	count += 2;
912 	/* clear state */
913 	count += 2;
914 
915 	return count;
916 }
917 
918 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
919 				    volatile u32 *buffer)
920 {
921 	u32 count = 0, i;
922 	const struct cs_section_def *sect = NULL;
923 	const struct cs_extent_def *ext = NULL;
924 
925 	if (adev->gfx.rlc.cs_data == NULL)
926 		return;
927 	if (buffer == NULL)
928 		return;
929 
930 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
931 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
932 
933 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
934 	buffer[count++] = cpu_to_le32(0x80000000);
935 	buffer[count++] = cpu_to_le32(0x80000000);
936 
937 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
938 		for (ext = sect->section; ext->extent != NULL; ++ext) {
939 			if (sect->id == SECT_CONTEXT) {
940 				buffer[count++] =
941 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
942 				buffer[count++] = cpu_to_le32(ext->reg_index -
943 						PACKET3_SET_CONTEXT_REG_START);
944 				for (i = 0; i < ext->reg_count; i++)
945 					buffer[count++] = cpu_to_le32(ext->extent[i]);
946 			} else {
947 				return;
948 			}
949 		}
950 	}
951 
952 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
953 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
954 
955 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
956 	buffer[count++] = cpu_to_le32(0);
957 }
958 
959 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
960 {
961 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
962 	uint32_t pg_always_on_cu_num = 2;
963 	uint32_t always_on_cu_num;
964 	uint32_t i, j, k;
965 	uint32_t mask, cu_bitmap, counter;
966 
967 	if (adev->flags & AMD_IS_APU)
968 		always_on_cu_num = 4;
969 	else if (adev->asic_type == CHIP_VEGA12)
970 		always_on_cu_num = 8;
971 	else
972 		always_on_cu_num = 12;
973 
974 	mutex_lock(&adev->grbm_idx_mutex);
975 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
976 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
977 			mask = 1;
978 			cu_bitmap = 0;
979 			counter = 0;
980 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
981 
982 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
983 				if (cu_info->bitmap[i][j] & mask) {
984 					if (counter == pg_always_on_cu_num)
985 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
986 					if (counter < always_on_cu_num)
987 						cu_bitmap |= mask;
988 					else
989 						break;
990 					counter++;
991 				}
992 				mask <<= 1;
993 			}
994 
995 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
996 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
997 		}
998 	}
999 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1000 	mutex_unlock(&adev->grbm_idx_mutex);
1001 }
1002 
1003 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1004 {
1005 	uint32_t data;
1006 
1007 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1008 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1009 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1010 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1011 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1012 
1013 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1014 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1015 
1016 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1017 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1018 
1019 	mutex_lock(&adev->grbm_idx_mutex);
1020 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1021 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1022 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1023 
1024 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1025 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1026 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1027 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1028 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1029 
1030 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1031 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1032 	data &= 0x0000FFFF;
1033 	data |= 0x00C00000;
1034 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1035 
1036 	/*
1037 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1038 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1039 	 */
1040 
1041 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1042 	 * but used for RLC_LB_CNTL configuration */
1043 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1044 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1045 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1046 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1047 	mutex_unlock(&adev->grbm_idx_mutex);
1048 
1049 	gfx_v9_0_init_always_on_cu_mask(adev);
1050 }
1051 
1052 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1053 {
1054 	uint32_t data;
1055 
1056 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1057 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1058 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1059 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1060 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1061 
1062 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1063 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1064 
1065 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1066 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1067 
1068 	mutex_lock(&adev->grbm_idx_mutex);
1069 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1070 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1071 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1072 
1073 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1074 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1075 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1076 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1077 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1078 
1079 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1080 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1081 	data &= 0x0000FFFF;
1082 	data |= 0x00C00000;
1083 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1084 
1085 	/*
1086 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1087 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1088 	 */
1089 
1090 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1091 	 * but used for RLC_LB_CNTL configuration */
1092 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1093 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1094 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1095 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1096 	mutex_unlock(&adev->grbm_idx_mutex);
1097 
1098 	gfx_v9_0_init_always_on_cu_mask(adev);
1099 }
1100 
1101 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1102 {
1103 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1104 }
1105 
1106 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1107 {
1108 	return 5;
1109 }
1110 
1111 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1112 {
1113 	const struct cs_section_def *cs_data;
1114 	int r;
1115 
1116 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1117 
1118 	cs_data = adev->gfx.rlc.cs_data;
1119 
1120 	if (cs_data) {
1121 		/* init clear state block */
1122 		r = amdgpu_gfx_rlc_init_csb(adev);
1123 		if (r)
1124 			return r;
1125 	}
1126 
1127 	if (adev->asic_type == CHIP_RAVEN) {
1128 		/* TODO: double check the cp_table_size for RV */
1129 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1130 		r = amdgpu_gfx_rlc_init_cpt(adev);
1131 		if (r)
1132 			return r;
1133 	}
1134 
1135 	switch (adev->asic_type) {
1136 	case CHIP_RAVEN:
1137 		gfx_v9_0_init_lbpw(adev);
1138 		break;
1139 	case CHIP_VEGA20:
1140 		gfx_v9_4_init_lbpw(adev);
1141 		break;
1142 	default:
1143 		break;
1144 	}
1145 
1146 	return 0;
1147 }
1148 
1149 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1150 {
1151 	int r;
1152 
1153 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1154 	if (unlikely(r != 0))
1155 		return r;
1156 
1157 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1158 			AMDGPU_GEM_DOMAIN_VRAM);
1159 	if (!r)
1160 		adev->gfx.rlc.clear_state_gpu_addr =
1161 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1162 
1163 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1164 
1165 	return r;
1166 }
1167 
1168 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1169 {
1170 	int r;
1171 
1172 	if (!adev->gfx.rlc.clear_state_obj)
1173 		return;
1174 
1175 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1176 	if (likely(r == 0)) {
1177 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1178 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1179 	}
1180 }
1181 
1182 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1183 {
1184 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1185 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1186 }
1187 
1188 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1189 {
1190 	int r;
1191 	u32 *hpd;
1192 	const __le32 *fw_data;
1193 	unsigned fw_size;
1194 	u32 *fw;
1195 	size_t mec_hpd_size;
1196 
1197 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1198 
1199 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1200 
1201 	/* take ownership of the relevant compute queues */
1202 	amdgpu_gfx_compute_queue_acquire(adev);
1203 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1204 
1205 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1206 				      AMDGPU_GEM_DOMAIN_VRAM,
1207 				      &adev->gfx.mec.hpd_eop_obj,
1208 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1209 				      (void **)&hpd);
1210 	if (r) {
1211 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1212 		gfx_v9_0_mec_fini(adev);
1213 		return r;
1214 	}
1215 
1216 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1217 
1218 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1219 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1220 
1221 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1222 
1223 	fw_data = (const __le32 *)
1224 		(adev->gfx.mec_fw->data +
1225 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1226 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1227 
1228 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1229 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1230 				      &adev->gfx.mec.mec_fw_obj,
1231 				      &adev->gfx.mec.mec_fw_gpu_addr,
1232 				      (void **)&fw);
1233 	if (r) {
1234 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1235 		gfx_v9_0_mec_fini(adev);
1236 		return r;
1237 	}
1238 
1239 	memcpy(fw, fw_data, fw_size);
1240 
1241 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1242 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1243 
1244 	return 0;
1245 }
1246 
1247 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1248 {
1249 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1250 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1251 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1252 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1253 		(SQ_IND_INDEX__FORCE_READ_MASK));
1254 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1255 }
1256 
1257 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1258 			   uint32_t wave, uint32_t thread,
1259 			   uint32_t regno, uint32_t num, uint32_t *out)
1260 {
1261 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1262 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1263 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1264 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1265 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1266 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1267 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1268 	while (num--)
1269 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1270 }
1271 
1272 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1273 {
1274 	/* type 1 wave data */
1275 	dst[(*no_fields)++] = 1;
1276 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1277 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1278 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1279 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1280 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1281 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1282 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1283 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1288 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1289 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1290 }
1291 
1292 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1293 				     uint32_t wave, uint32_t start,
1294 				     uint32_t size, uint32_t *dst)
1295 {
1296 	wave_read_regs(
1297 		adev, simd, wave, 0,
1298 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1299 }
1300 
1301 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1302 				     uint32_t wave, uint32_t thread,
1303 				     uint32_t start, uint32_t size,
1304 				     uint32_t *dst)
1305 {
1306 	wave_read_regs(
1307 		adev, simd, wave, thread,
1308 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1309 }
1310 
1311 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1312 				  u32 me, u32 pipe, u32 q)
1313 {
1314 	soc15_grbm_select(adev, me, pipe, q, 0);
1315 }
1316 
1317 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1318 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1319 	.select_se_sh = &gfx_v9_0_select_se_sh,
1320 	.read_wave_data = &gfx_v9_0_read_wave_data,
1321 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1322 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1323 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1324 };
1325 
1326 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1327 {
1328 	u32 gb_addr_config;
1329 	int err;
1330 
1331 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1332 
1333 	switch (adev->asic_type) {
1334 	case CHIP_VEGA10:
1335 		adev->gfx.config.max_hw_contexts = 8;
1336 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1340 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1341 		break;
1342 	case CHIP_VEGA12:
1343 		adev->gfx.config.max_hw_contexts = 8;
1344 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1345 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1346 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1347 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1348 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1349 		DRM_INFO("fix gfx.config for vega12\n");
1350 		break;
1351 	case CHIP_VEGA20:
1352 		adev->gfx.config.max_hw_contexts = 8;
1353 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1354 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1355 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1356 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1357 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1358 		gb_addr_config &= ~0xf3e777ff;
1359 		gb_addr_config |= 0x22014042;
1360 		/* check vbios table if gpu info is not available */
1361 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1362 		if (err)
1363 			return err;
1364 		break;
1365 	case CHIP_RAVEN:
1366 		adev->gfx.config.max_hw_contexts = 8;
1367 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1368 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1369 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1370 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1371 		if (adev->rev_id >= 8)
1372 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1373 		else
1374 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1375 		break;
1376 	default:
1377 		BUG();
1378 		break;
1379 	}
1380 
1381 	adev->gfx.config.gb_addr_config = gb_addr_config;
1382 
1383 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1384 			REG_GET_FIELD(
1385 					adev->gfx.config.gb_addr_config,
1386 					GB_ADDR_CONFIG,
1387 					NUM_PIPES);
1388 
1389 	adev->gfx.config.max_tile_pipes =
1390 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1391 
1392 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1393 			REG_GET_FIELD(
1394 					adev->gfx.config.gb_addr_config,
1395 					GB_ADDR_CONFIG,
1396 					NUM_BANKS);
1397 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1398 			REG_GET_FIELD(
1399 					adev->gfx.config.gb_addr_config,
1400 					GB_ADDR_CONFIG,
1401 					MAX_COMPRESSED_FRAGS);
1402 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1403 			REG_GET_FIELD(
1404 					adev->gfx.config.gb_addr_config,
1405 					GB_ADDR_CONFIG,
1406 					NUM_RB_PER_SE);
1407 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1408 			REG_GET_FIELD(
1409 					adev->gfx.config.gb_addr_config,
1410 					GB_ADDR_CONFIG,
1411 					NUM_SHADER_ENGINES);
1412 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1413 			REG_GET_FIELD(
1414 					adev->gfx.config.gb_addr_config,
1415 					GB_ADDR_CONFIG,
1416 					PIPE_INTERLEAVE_SIZE));
1417 
1418 	return 0;
1419 }
1420 
1421 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1422 				   struct amdgpu_ngg_buf *ngg_buf,
1423 				   int size_se,
1424 				   int default_size_se)
1425 {
1426 	int r;
1427 
1428 	if (size_se < 0) {
1429 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1430 		return -EINVAL;
1431 	}
1432 	size_se = size_se ? size_se : default_size_se;
1433 
1434 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1435 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1436 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1437 				    &ngg_buf->bo,
1438 				    &ngg_buf->gpu_addr,
1439 				    NULL);
1440 	if (r) {
1441 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1442 		return r;
1443 	}
1444 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1445 
1446 	return r;
1447 }
1448 
1449 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1450 {
1451 	int i;
1452 
1453 	for (i = 0; i < NGG_BUF_MAX; i++)
1454 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1455 				      &adev->gfx.ngg.buf[i].gpu_addr,
1456 				      NULL);
1457 
1458 	memset(&adev->gfx.ngg.buf[0], 0,
1459 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1460 
1461 	adev->gfx.ngg.init = false;
1462 
1463 	return 0;
1464 }
1465 
1466 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1467 {
1468 	int r;
1469 
1470 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1471 		return 0;
1472 
1473 	/* GDS reserve memory: 64 bytes alignment */
1474 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1475 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1476 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1477 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1478 
1479 	/* Primitive Buffer */
1480 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1481 				    amdgpu_prim_buf_per_se,
1482 				    64 * 1024);
1483 	if (r) {
1484 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1485 		goto err;
1486 	}
1487 
1488 	/* Position Buffer */
1489 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1490 				    amdgpu_pos_buf_per_se,
1491 				    256 * 1024);
1492 	if (r) {
1493 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1494 		goto err;
1495 	}
1496 
1497 	/* Control Sideband */
1498 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1499 				    amdgpu_cntl_sb_buf_per_se,
1500 				    256);
1501 	if (r) {
1502 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1503 		goto err;
1504 	}
1505 
1506 	/* Parameter Cache, not created by default */
1507 	if (amdgpu_param_buf_per_se <= 0)
1508 		goto out;
1509 
1510 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1511 				    amdgpu_param_buf_per_se,
1512 				    512 * 1024);
1513 	if (r) {
1514 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1515 		goto err;
1516 	}
1517 
1518 out:
1519 	adev->gfx.ngg.init = true;
1520 	return 0;
1521 err:
1522 	gfx_v9_0_ngg_fini(adev);
1523 	return r;
1524 }
1525 
1526 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1527 {
1528 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1529 	int r;
1530 	u32 data, base;
1531 
1532 	if (!amdgpu_ngg)
1533 		return 0;
1534 
1535 	/* Program buffer size */
1536 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1537 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1538 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1539 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1540 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1541 
1542 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1543 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1544 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1545 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1546 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1547 
1548 	/* Program buffer base address */
1549 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1550 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1551 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1552 
1553 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1555 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1556 
1557 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1558 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1559 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1560 
1561 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1563 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1564 
1565 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1566 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1567 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1568 
1569 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1571 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1572 
1573 	/* Clear GDS reserved memory */
1574 	r = amdgpu_ring_alloc(ring, 17);
1575 	if (r) {
1576 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1577 			  ring->name, r);
1578 		return r;
1579 	}
1580 
1581 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1582 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1583 			           (adev->gds.gds_size +
1584 				    adev->gfx.ngg.gds_reserve_size));
1585 
1586 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1587 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1588 				PACKET3_DMA_DATA_DST_SEL(1) |
1589 				PACKET3_DMA_DATA_SRC_SEL(2)));
1590 	amdgpu_ring_write(ring, 0);
1591 	amdgpu_ring_write(ring, 0);
1592 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1593 	amdgpu_ring_write(ring, 0);
1594 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1595 				adev->gfx.ngg.gds_reserve_size);
1596 
1597 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1598 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1599 
1600 	amdgpu_ring_commit(ring);
1601 
1602 	return 0;
1603 }
1604 
1605 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1606 				      int mec, int pipe, int queue)
1607 {
1608 	int r;
1609 	unsigned irq_type;
1610 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1611 
1612 	ring = &adev->gfx.compute_ring[ring_id];
1613 
1614 	/* mec0 is me1 */
1615 	ring->me = mec + 1;
1616 	ring->pipe = pipe;
1617 	ring->queue = queue;
1618 
1619 	ring->ring_obj = NULL;
1620 	ring->use_doorbell = true;
1621 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1622 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1623 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1624 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1625 
1626 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1627 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1628 		+ ring->pipe;
1629 
1630 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1631 	r = amdgpu_ring_init(adev, ring, 1024,
1632 			     &adev->gfx.eop_irq, irq_type);
1633 	if (r)
1634 		return r;
1635 
1636 
1637 	return 0;
1638 }
1639 
1640 static int gfx_v9_0_sw_init(void *handle)
1641 {
1642 	int i, j, k, r, ring_id;
1643 	struct amdgpu_ring *ring;
1644 	struct amdgpu_kiq *kiq;
1645 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646 
1647 	switch (adev->asic_type) {
1648 	case CHIP_VEGA10:
1649 	case CHIP_VEGA12:
1650 	case CHIP_VEGA20:
1651 	case CHIP_RAVEN:
1652 		adev->gfx.mec.num_mec = 2;
1653 		break;
1654 	default:
1655 		adev->gfx.mec.num_mec = 1;
1656 		break;
1657 	}
1658 
1659 	adev->gfx.mec.num_pipe_per_mec = 4;
1660 	adev->gfx.mec.num_queue_per_pipe = 8;
1661 
1662 	/* EOP Event */
1663 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1664 	if (r)
1665 		return r;
1666 
1667 	/* Privileged reg */
1668 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1669 			      &adev->gfx.priv_reg_irq);
1670 	if (r)
1671 		return r;
1672 
1673 	/* Privileged inst */
1674 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1675 			      &adev->gfx.priv_inst_irq);
1676 	if (r)
1677 		return r;
1678 
1679 	/* ECC error */
1680 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1681 			      &adev->gfx.cp_ecc_error_irq);
1682 	if (r)
1683 		return r;
1684 
1685 	/* FUE error */
1686 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1687 			      &adev->gfx.cp_ecc_error_irq);
1688 	if (r)
1689 		return r;
1690 
1691 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1692 
1693 	gfx_v9_0_scratch_init(adev);
1694 
1695 	r = gfx_v9_0_init_microcode(adev);
1696 	if (r) {
1697 		DRM_ERROR("Failed to load gfx firmware!\n");
1698 		return r;
1699 	}
1700 
1701 	r = adev->gfx.rlc.funcs->init(adev);
1702 	if (r) {
1703 		DRM_ERROR("Failed to init rlc BOs!\n");
1704 		return r;
1705 	}
1706 
1707 	r = gfx_v9_0_mec_init(adev);
1708 	if (r) {
1709 		DRM_ERROR("Failed to init MEC BOs!\n");
1710 		return r;
1711 	}
1712 
1713 	/* set up the gfx ring */
1714 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1715 		ring = &adev->gfx.gfx_ring[i];
1716 		ring->ring_obj = NULL;
1717 		if (!i)
1718 			sprintf(ring->name, "gfx");
1719 		else
1720 			sprintf(ring->name, "gfx_%d", i);
1721 		ring->use_doorbell = true;
1722 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1723 		r = amdgpu_ring_init(adev, ring, 1024,
1724 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1725 		if (r)
1726 			return r;
1727 	}
1728 
1729 	/* set up the compute queues - allocate horizontally across pipes */
1730 	ring_id = 0;
1731 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1732 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1733 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1734 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1735 					continue;
1736 
1737 				r = gfx_v9_0_compute_ring_init(adev,
1738 							       ring_id,
1739 							       i, k, j);
1740 				if (r)
1741 					return r;
1742 
1743 				ring_id++;
1744 			}
1745 		}
1746 	}
1747 
1748 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1749 	if (r) {
1750 		DRM_ERROR("Failed to init KIQ BOs!\n");
1751 		return r;
1752 	}
1753 
1754 	kiq = &adev->gfx.kiq;
1755 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1756 	if (r)
1757 		return r;
1758 
1759 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1760 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1761 	if (r)
1762 		return r;
1763 
1764 	adev->gfx.ce_ram_size = 0x8000;
1765 
1766 	r = gfx_v9_0_gpu_early_init(adev);
1767 	if (r)
1768 		return r;
1769 
1770 	r = gfx_v9_0_ngg_init(adev);
1771 	if (r)
1772 		return r;
1773 
1774 	return 0;
1775 }
1776 
1777 
1778 static int gfx_v9_0_sw_fini(void *handle)
1779 {
1780 	int i;
1781 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1782 
1783 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1784 			adev->gfx.ras_if) {
1785 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1786 		struct ras_ih_if ih_info = {
1787 			.head = *ras_if,
1788 		};
1789 
1790 		amdgpu_ras_debugfs_remove(adev, ras_if);
1791 		amdgpu_ras_sysfs_remove(adev, ras_if);
1792 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1793 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1794 		kfree(ras_if);
1795 	}
1796 
1797 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1798 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1799 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1800 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1801 
1802 	amdgpu_gfx_mqd_sw_fini(adev);
1803 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1804 	amdgpu_gfx_kiq_fini(adev);
1805 
1806 	gfx_v9_0_mec_fini(adev);
1807 	gfx_v9_0_ngg_fini(adev);
1808 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1809 	if (adev->asic_type == CHIP_RAVEN) {
1810 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1811 				&adev->gfx.rlc.cp_table_gpu_addr,
1812 				(void **)&adev->gfx.rlc.cp_table_ptr);
1813 	}
1814 	gfx_v9_0_free_microcode(adev);
1815 
1816 	return 0;
1817 }
1818 
1819 
1820 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1821 {
1822 	/* TODO */
1823 }
1824 
1825 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1826 {
1827 	u32 data;
1828 
1829 	if (instance == 0xffffffff)
1830 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1831 	else
1832 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1833 
1834 	if (se_num == 0xffffffff)
1835 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1836 	else
1837 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1838 
1839 	if (sh_num == 0xffffffff)
1840 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1841 	else
1842 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1843 
1844 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1845 }
1846 
1847 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1848 {
1849 	u32 data, mask;
1850 
1851 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1852 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1853 
1854 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1855 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1856 
1857 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1858 					 adev->gfx.config.max_sh_per_se);
1859 
1860 	return (~data) & mask;
1861 }
1862 
1863 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1864 {
1865 	int i, j;
1866 	u32 data;
1867 	u32 active_rbs = 0;
1868 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1869 					adev->gfx.config.max_sh_per_se;
1870 
1871 	mutex_lock(&adev->grbm_idx_mutex);
1872 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1873 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1874 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1875 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1876 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1877 					       rb_bitmap_width_per_sh);
1878 		}
1879 	}
1880 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1881 	mutex_unlock(&adev->grbm_idx_mutex);
1882 
1883 	adev->gfx.config.backend_enable_mask = active_rbs;
1884 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1885 }
1886 
1887 #define DEFAULT_SH_MEM_BASES	(0x6000)
1888 #define FIRST_COMPUTE_VMID	(8)
1889 #define LAST_COMPUTE_VMID	(16)
1890 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1891 {
1892 	int i;
1893 	uint32_t sh_mem_config;
1894 	uint32_t sh_mem_bases;
1895 
1896 	/*
1897 	 * Configure apertures:
1898 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1899 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1900 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1901 	 */
1902 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1903 
1904 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1905 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1906 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1907 
1908 	mutex_lock(&adev->srbm_mutex);
1909 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1910 		soc15_grbm_select(adev, 0, 0, 0, i);
1911 		/* CP and shaders */
1912 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1913 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1914 	}
1915 	soc15_grbm_select(adev, 0, 0, 0, 0);
1916 	mutex_unlock(&adev->srbm_mutex);
1917 }
1918 
1919 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1920 {
1921 	u32 tmp;
1922 	int i;
1923 
1924 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1925 
1926 	gfx_v9_0_tiling_mode_table_init(adev);
1927 
1928 	gfx_v9_0_setup_rb(adev);
1929 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1930 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1931 
1932 	/* XXX SH_MEM regs */
1933 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1934 	mutex_lock(&adev->srbm_mutex);
1935 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1936 		soc15_grbm_select(adev, 0, 0, 0, i);
1937 		/* CP and shaders */
1938 		if (i == 0) {
1939 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1940 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1941 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1942 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1943 		} else {
1944 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1945 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1946 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1947 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1948 				(adev->gmc.private_aperture_start >> 48));
1949 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1950 				(adev->gmc.shared_aperture_start >> 48));
1951 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1952 		}
1953 	}
1954 	soc15_grbm_select(adev, 0, 0, 0, 0);
1955 
1956 	mutex_unlock(&adev->srbm_mutex);
1957 
1958 	gfx_v9_0_init_compute_vmid(adev);
1959 
1960 	mutex_lock(&adev->grbm_idx_mutex);
1961 	/*
1962 	 * making sure that the following register writes will be broadcasted
1963 	 * to all the shaders
1964 	 */
1965 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1966 
1967 	WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1968 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
1969 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1970 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
1971 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1972 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
1973 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1974 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1975 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1976 	mutex_unlock(&adev->grbm_idx_mutex);
1977 
1978 }
1979 
1980 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1981 {
1982 	u32 i, j, k;
1983 	u32 mask;
1984 
1985 	mutex_lock(&adev->grbm_idx_mutex);
1986 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1987 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1988 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1989 			for (k = 0; k < adev->usec_timeout; k++) {
1990 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1991 					break;
1992 				udelay(1);
1993 			}
1994 			if (k == adev->usec_timeout) {
1995 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1996 						      0xffffffff, 0xffffffff);
1997 				mutex_unlock(&adev->grbm_idx_mutex);
1998 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1999 					 i, j);
2000 				return;
2001 			}
2002 		}
2003 	}
2004 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2005 	mutex_unlock(&adev->grbm_idx_mutex);
2006 
2007 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2008 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2009 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2010 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2011 	for (k = 0; k < adev->usec_timeout; k++) {
2012 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2013 			break;
2014 		udelay(1);
2015 	}
2016 }
2017 
2018 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2019 					       bool enable)
2020 {
2021 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2022 
2023 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2024 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2025 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2026 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2027 
2028 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2029 }
2030 
2031 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2032 {
2033 	/* csib */
2034 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2035 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2036 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2037 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2038 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2039 			adev->gfx.rlc.clear_state_size);
2040 }
2041 
2042 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2043 				int indirect_offset,
2044 				int list_size,
2045 				int *unique_indirect_regs,
2046 				int unique_indirect_reg_count,
2047 				int *indirect_start_offsets,
2048 				int *indirect_start_offsets_count,
2049 				int max_start_offsets_count)
2050 {
2051 	int idx;
2052 
2053 	for (; indirect_offset < list_size; indirect_offset++) {
2054 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2055 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2056 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2057 
2058 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2059 			indirect_offset += 2;
2060 
2061 			/* look for the matching indice */
2062 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2063 				if (unique_indirect_regs[idx] ==
2064 					register_list_format[indirect_offset] ||
2065 					!unique_indirect_regs[idx])
2066 					break;
2067 			}
2068 
2069 			BUG_ON(idx >= unique_indirect_reg_count);
2070 
2071 			if (!unique_indirect_regs[idx])
2072 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2073 
2074 			indirect_offset++;
2075 		}
2076 	}
2077 }
2078 
2079 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2080 {
2081 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2082 	int unique_indirect_reg_count = 0;
2083 
2084 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2085 	int indirect_start_offsets_count = 0;
2086 
2087 	int list_size = 0;
2088 	int i = 0, j = 0;
2089 	u32 tmp = 0;
2090 
2091 	u32 *register_list_format =
2092 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2093 	if (!register_list_format)
2094 		return -ENOMEM;
2095 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2096 		adev->gfx.rlc.reg_list_format_size_bytes);
2097 
2098 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2099 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2100 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2101 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2102 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2103 				    unique_indirect_regs,
2104 				    unique_indirect_reg_count,
2105 				    indirect_start_offsets,
2106 				    &indirect_start_offsets_count,
2107 				    ARRAY_SIZE(indirect_start_offsets));
2108 
2109 	/* enable auto inc in case it is disabled */
2110 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2111 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2112 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2113 
2114 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2115 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2116 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2117 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2118 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2119 			adev->gfx.rlc.register_restore[i]);
2120 
2121 	/* load indirect register */
2122 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2123 		adev->gfx.rlc.reg_list_format_start);
2124 
2125 	/* direct register portion */
2126 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2127 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2128 			register_list_format[i]);
2129 
2130 	/* indirect register portion */
2131 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2132 		if (register_list_format[i] == 0xFFFFFFFF) {
2133 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2134 			continue;
2135 		}
2136 
2137 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2138 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2139 
2140 		for (j = 0; j < unique_indirect_reg_count; j++) {
2141 			if (register_list_format[i] == unique_indirect_regs[j]) {
2142 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2143 				break;
2144 			}
2145 		}
2146 
2147 		BUG_ON(j >= unique_indirect_reg_count);
2148 
2149 		i++;
2150 	}
2151 
2152 	/* set save/restore list size */
2153 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2154 	list_size = list_size >> 1;
2155 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2156 		adev->gfx.rlc.reg_restore_list_size);
2157 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2158 
2159 	/* write the starting offsets to RLC scratch ram */
2160 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2161 		adev->gfx.rlc.starting_offsets_start);
2162 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2163 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2164 		       indirect_start_offsets[i]);
2165 
2166 	/* load unique indirect regs*/
2167 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2168 		if (unique_indirect_regs[i] != 0) {
2169 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2170 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2171 			       unique_indirect_regs[i] & 0x3FFFF);
2172 
2173 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2174 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2175 			       unique_indirect_regs[i] >> 20);
2176 		}
2177 	}
2178 
2179 	kfree(register_list_format);
2180 	return 0;
2181 }
2182 
2183 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2184 {
2185 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2186 }
2187 
2188 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2189 					     bool enable)
2190 {
2191 	uint32_t data = 0;
2192 	uint32_t default_data = 0;
2193 
2194 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2195 	if (enable == true) {
2196 		/* enable GFXIP control over CGPG */
2197 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2198 		if(default_data != data)
2199 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2200 
2201 		/* update status */
2202 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2203 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2204 		if(default_data != data)
2205 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2206 	} else {
2207 		/* restore GFXIP control over GCPG */
2208 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2209 		if(default_data != data)
2210 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2211 	}
2212 }
2213 
2214 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2215 {
2216 	uint32_t data = 0;
2217 
2218 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2219 			      AMD_PG_SUPPORT_GFX_SMG |
2220 			      AMD_PG_SUPPORT_GFX_DMG)) {
2221 		/* init IDLE_POLL_COUNT = 60 */
2222 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2223 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2224 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2225 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2226 
2227 		/* init RLC PG Delay */
2228 		data = 0;
2229 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2230 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2231 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2232 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2233 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2234 
2235 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2236 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2237 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2238 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2239 
2240 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2241 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2242 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2243 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2244 
2245 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2246 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2247 
2248 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2249 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2250 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2251 
2252 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2253 	}
2254 }
2255 
2256 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2257 						bool enable)
2258 {
2259 	uint32_t data = 0;
2260 	uint32_t default_data = 0;
2261 
2262 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2263 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2264 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2265 			     enable ? 1 : 0);
2266 	if (default_data != data)
2267 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2268 }
2269 
2270 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2271 						bool enable)
2272 {
2273 	uint32_t data = 0;
2274 	uint32_t default_data = 0;
2275 
2276 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2277 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2278 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2279 			     enable ? 1 : 0);
2280 	if(default_data != data)
2281 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2282 }
2283 
2284 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2285 					bool enable)
2286 {
2287 	uint32_t data = 0;
2288 	uint32_t default_data = 0;
2289 
2290 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2291 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2292 			     CP_PG_DISABLE,
2293 			     enable ? 0 : 1);
2294 	if(default_data != data)
2295 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2296 }
2297 
2298 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2299 						bool enable)
2300 {
2301 	uint32_t data, default_data;
2302 
2303 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2304 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2305 			     GFX_POWER_GATING_ENABLE,
2306 			     enable ? 1 : 0);
2307 	if(default_data != data)
2308 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2309 }
2310 
2311 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2312 						bool enable)
2313 {
2314 	uint32_t data, default_data;
2315 
2316 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2317 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2318 			     GFX_PIPELINE_PG_ENABLE,
2319 			     enable ? 1 : 0);
2320 	if(default_data != data)
2321 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2322 
2323 	if (!enable)
2324 		/* read any GFX register to wake up GFX */
2325 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2326 }
2327 
2328 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2329 						       bool enable)
2330 {
2331 	uint32_t data, default_data;
2332 
2333 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2334 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2335 			     STATIC_PER_CU_PG_ENABLE,
2336 			     enable ? 1 : 0);
2337 	if(default_data != data)
2338 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2339 }
2340 
2341 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2342 						bool enable)
2343 {
2344 	uint32_t data, default_data;
2345 
2346 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2347 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2348 			     DYN_PER_CU_PG_ENABLE,
2349 			     enable ? 1 : 0);
2350 	if(default_data != data)
2351 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2352 }
2353 
2354 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2355 {
2356 	gfx_v9_0_init_csb(adev);
2357 
2358 	/*
2359 	 * Rlc save restore list is workable since v2_1.
2360 	 * And it's needed by gfxoff feature.
2361 	 */
2362 	if (adev->gfx.rlc.is_rlc_v2_1) {
2363 		gfx_v9_1_init_rlc_save_restore_list(adev);
2364 		gfx_v9_0_enable_save_restore_machine(adev);
2365 	}
2366 
2367 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2368 			      AMD_PG_SUPPORT_GFX_SMG |
2369 			      AMD_PG_SUPPORT_GFX_DMG |
2370 			      AMD_PG_SUPPORT_CP |
2371 			      AMD_PG_SUPPORT_GDS |
2372 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2373 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2374 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2375 		gfx_v9_0_init_gfx_power_gating(adev);
2376 	}
2377 }
2378 
2379 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2380 {
2381 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2382 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2383 	gfx_v9_0_wait_for_rlc_serdes(adev);
2384 }
2385 
2386 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2387 {
2388 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2389 	udelay(50);
2390 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2391 	udelay(50);
2392 }
2393 
2394 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2395 {
2396 #ifdef AMDGPU_RLC_DEBUG_RETRY
2397 	u32 rlc_ucode_ver;
2398 #endif
2399 
2400 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2401 	udelay(50);
2402 
2403 	/* carrizo do enable cp interrupt after cp inited */
2404 	if (!(adev->flags & AMD_IS_APU)) {
2405 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2406 		udelay(50);
2407 	}
2408 
2409 #ifdef AMDGPU_RLC_DEBUG_RETRY
2410 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2411 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2412 	if(rlc_ucode_ver == 0x108) {
2413 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2414 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2415 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2416 		 * default is 0x9C4 to create a 100us interval */
2417 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2418 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2419 		 * to disable the page fault retry interrupts, default is
2420 		 * 0x100 (256) */
2421 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2422 	}
2423 #endif
2424 }
2425 
2426 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2427 {
2428 	const struct rlc_firmware_header_v2_0 *hdr;
2429 	const __le32 *fw_data;
2430 	unsigned i, fw_size;
2431 
2432 	if (!adev->gfx.rlc_fw)
2433 		return -EINVAL;
2434 
2435 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2436 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2437 
2438 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2439 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2440 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2441 
2442 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2443 			RLCG_UCODE_LOADING_START_ADDRESS);
2444 	for (i = 0; i < fw_size; i++)
2445 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2446 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2447 
2448 	return 0;
2449 }
2450 
2451 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2452 {
2453 	int r;
2454 
2455 	if (amdgpu_sriov_vf(adev)) {
2456 		gfx_v9_0_init_csb(adev);
2457 		return 0;
2458 	}
2459 
2460 	adev->gfx.rlc.funcs->stop(adev);
2461 
2462 	/* disable CG */
2463 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2464 
2465 	gfx_v9_0_init_pg(adev);
2466 
2467 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2468 		/* legacy rlc firmware loading */
2469 		r = gfx_v9_0_rlc_load_microcode(adev);
2470 		if (r)
2471 			return r;
2472 	}
2473 
2474 	switch (adev->asic_type) {
2475 	case CHIP_RAVEN:
2476 		if (amdgpu_lbpw == 0)
2477 			gfx_v9_0_enable_lbpw(adev, false);
2478 		else
2479 			gfx_v9_0_enable_lbpw(adev, true);
2480 		break;
2481 	case CHIP_VEGA20:
2482 		if (amdgpu_lbpw > 0)
2483 			gfx_v9_0_enable_lbpw(adev, true);
2484 		else
2485 			gfx_v9_0_enable_lbpw(adev, false);
2486 		break;
2487 	default:
2488 		break;
2489 	}
2490 
2491 	adev->gfx.rlc.funcs->start(adev);
2492 
2493 	return 0;
2494 }
2495 
2496 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2497 {
2498 	int i;
2499 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2500 
2501 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2502 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2503 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2504 	if (!enable) {
2505 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2506 			adev->gfx.gfx_ring[i].sched.ready = false;
2507 	}
2508 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2509 	udelay(50);
2510 }
2511 
2512 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2513 {
2514 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2515 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2516 	const struct gfx_firmware_header_v1_0 *me_hdr;
2517 	const __le32 *fw_data;
2518 	unsigned i, fw_size;
2519 
2520 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2521 		return -EINVAL;
2522 
2523 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2524 		adev->gfx.pfp_fw->data;
2525 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2526 		adev->gfx.ce_fw->data;
2527 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2528 		adev->gfx.me_fw->data;
2529 
2530 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2531 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2532 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2533 
2534 	gfx_v9_0_cp_gfx_enable(adev, false);
2535 
2536 	/* PFP */
2537 	fw_data = (const __le32 *)
2538 		(adev->gfx.pfp_fw->data +
2539 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2540 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2541 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2542 	for (i = 0; i < fw_size; i++)
2543 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2544 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2545 
2546 	/* CE */
2547 	fw_data = (const __le32 *)
2548 		(adev->gfx.ce_fw->data +
2549 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2550 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2551 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2552 	for (i = 0; i < fw_size; i++)
2553 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2554 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2555 
2556 	/* ME */
2557 	fw_data = (const __le32 *)
2558 		(adev->gfx.me_fw->data +
2559 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2560 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2561 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2562 	for (i = 0; i < fw_size; i++)
2563 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2564 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2565 
2566 	return 0;
2567 }
2568 
2569 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2570 {
2571 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2572 	const struct cs_section_def *sect = NULL;
2573 	const struct cs_extent_def *ext = NULL;
2574 	int r, i, tmp;
2575 
2576 	/* init the CP */
2577 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2578 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2579 
2580 	gfx_v9_0_cp_gfx_enable(adev, true);
2581 
2582 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2583 	if (r) {
2584 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2585 		return r;
2586 	}
2587 
2588 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2589 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2590 
2591 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2592 	amdgpu_ring_write(ring, 0x80000000);
2593 	amdgpu_ring_write(ring, 0x80000000);
2594 
2595 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2596 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2597 			if (sect->id == SECT_CONTEXT) {
2598 				amdgpu_ring_write(ring,
2599 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2600 					       ext->reg_count));
2601 				amdgpu_ring_write(ring,
2602 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2603 				for (i = 0; i < ext->reg_count; i++)
2604 					amdgpu_ring_write(ring, ext->extent[i]);
2605 			}
2606 		}
2607 	}
2608 
2609 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2610 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2611 
2612 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2613 	amdgpu_ring_write(ring, 0);
2614 
2615 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2616 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2617 	amdgpu_ring_write(ring, 0x8000);
2618 	amdgpu_ring_write(ring, 0x8000);
2619 
2620 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2621 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2622 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2623 	amdgpu_ring_write(ring, tmp);
2624 	amdgpu_ring_write(ring, 0);
2625 
2626 	amdgpu_ring_commit(ring);
2627 
2628 	return 0;
2629 }
2630 
2631 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2632 {
2633 	struct amdgpu_ring *ring;
2634 	u32 tmp;
2635 	u32 rb_bufsz;
2636 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2637 
2638 	/* Set the write pointer delay */
2639 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2640 
2641 	/* set the RB to use vmid 0 */
2642 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2643 
2644 	/* Set ring buffer size */
2645 	ring = &adev->gfx.gfx_ring[0];
2646 	rb_bufsz = order_base_2(ring->ring_size / 8);
2647 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2648 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2649 #ifdef __BIG_ENDIAN
2650 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2651 #endif
2652 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2653 
2654 	/* Initialize the ring buffer's write pointers */
2655 	ring->wptr = 0;
2656 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2657 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2658 
2659 	/* set the wb address wether it's enabled or not */
2660 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2661 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2662 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2663 
2664 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2665 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2666 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2667 
2668 	mdelay(1);
2669 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2670 
2671 	rb_addr = ring->gpu_addr >> 8;
2672 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2673 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2674 
2675 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2676 	if (ring->use_doorbell) {
2677 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2678 				    DOORBELL_OFFSET, ring->doorbell_index);
2679 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2680 				    DOORBELL_EN, 1);
2681 	} else {
2682 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2683 	}
2684 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2685 
2686 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2687 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2688 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2689 
2690 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2691 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2692 
2693 
2694 	/* start the ring */
2695 	gfx_v9_0_cp_gfx_start(adev);
2696 	ring->sched.ready = true;
2697 
2698 	return 0;
2699 }
2700 
2701 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2702 {
2703 	int i;
2704 
2705 	if (enable) {
2706 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2707 	} else {
2708 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2709 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2710 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2711 			adev->gfx.compute_ring[i].sched.ready = false;
2712 		adev->gfx.kiq.ring.sched.ready = false;
2713 	}
2714 	udelay(50);
2715 }
2716 
2717 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2718 {
2719 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2720 	const __le32 *fw_data;
2721 	unsigned i;
2722 	u32 tmp;
2723 
2724 	if (!adev->gfx.mec_fw)
2725 		return -EINVAL;
2726 
2727 	gfx_v9_0_cp_compute_enable(adev, false);
2728 
2729 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2730 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2731 
2732 	fw_data = (const __le32 *)
2733 		(adev->gfx.mec_fw->data +
2734 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2735 	tmp = 0;
2736 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2737 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2738 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2739 
2740 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2741 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2742 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2743 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2744 
2745 	/* MEC1 */
2746 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2747 			 mec_hdr->jt_offset);
2748 	for (i = 0; i < mec_hdr->jt_size; i++)
2749 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2750 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2751 
2752 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2753 			adev->gfx.mec_fw_version);
2754 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2755 
2756 	return 0;
2757 }
2758 
2759 /* KIQ functions */
2760 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2761 {
2762 	uint32_t tmp;
2763 	struct amdgpu_device *adev = ring->adev;
2764 
2765 	/* tell RLC which is KIQ queue */
2766 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2767 	tmp &= 0xffffff00;
2768 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2769 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2770 	tmp |= 0x80;
2771 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2772 }
2773 
2774 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2775 {
2776 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2777 	uint64_t queue_mask = 0;
2778 	int r, i;
2779 
2780 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2781 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2782 			continue;
2783 
2784 		/* This situation may be hit in the future if a new HW
2785 		 * generation exposes more than 64 queues. If so, the
2786 		 * definition of queue_mask needs updating */
2787 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2788 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2789 			break;
2790 		}
2791 
2792 		queue_mask |= (1ull << i);
2793 	}
2794 
2795 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2796 	if (r) {
2797 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2798 		return r;
2799 	}
2800 
2801 	/* set resources */
2802 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2803 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2804 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2805 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2806 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2807 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2808 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2809 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2810 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2811 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2812 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2813 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2814 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2815 
2816 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2817 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2818 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2819 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2820 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2821 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2822 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2823 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2824 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2825 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2826 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2827 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2828 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2829 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2830 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2831 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2832 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2833 	}
2834 
2835 	r = amdgpu_ring_test_helper(kiq_ring);
2836 	if (r)
2837 		DRM_ERROR("KCQ enable failed\n");
2838 
2839 	return r;
2840 }
2841 
2842 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2843 {
2844 	struct amdgpu_device *adev = ring->adev;
2845 	struct v9_mqd *mqd = ring->mqd_ptr;
2846 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2847 	uint32_t tmp;
2848 
2849 	mqd->header = 0xC0310800;
2850 	mqd->compute_pipelinestat_enable = 0x00000001;
2851 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2852 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2853 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2854 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2855 	mqd->compute_misc_reserved = 0x00000003;
2856 
2857 	mqd->dynamic_cu_mask_addr_lo =
2858 		lower_32_bits(ring->mqd_gpu_addr
2859 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860 	mqd->dynamic_cu_mask_addr_hi =
2861 		upper_32_bits(ring->mqd_gpu_addr
2862 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2863 
2864 	eop_base_addr = ring->eop_gpu_addr >> 8;
2865 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2866 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2867 
2868 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2869 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2870 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2871 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2872 
2873 	mqd->cp_hqd_eop_control = tmp;
2874 
2875 	/* enable doorbell? */
2876 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2877 
2878 	if (ring->use_doorbell) {
2879 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2880 				    DOORBELL_OFFSET, ring->doorbell_index);
2881 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2882 				    DOORBELL_EN, 1);
2883 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2884 				    DOORBELL_SOURCE, 0);
2885 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886 				    DOORBELL_HIT, 0);
2887 	} else {
2888 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2889 					 DOORBELL_EN, 0);
2890 	}
2891 
2892 	mqd->cp_hqd_pq_doorbell_control = tmp;
2893 
2894 	/* disable the queue if it's active */
2895 	ring->wptr = 0;
2896 	mqd->cp_hqd_dequeue_request = 0;
2897 	mqd->cp_hqd_pq_rptr = 0;
2898 	mqd->cp_hqd_pq_wptr_lo = 0;
2899 	mqd->cp_hqd_pq_wptr_hi = 0;
2900 
2901 	/* set the pointer to the MQD */
2902 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2903 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2904 
2905 	/* set MQD vmid to 0 */
2906 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2907 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2908 	mqd->cp_mqd_control = tmp;
2909 
2910 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2911 	hqd_gpu_addr = ring->gpu_addr >> 8;
2912 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2913 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2914 
2915 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2916 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2917 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2918 			    (order_base_2(ring->ring_size / 4) - 1));
2919 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2920 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2921 #ifdef __BIG_ENDIAN
2922 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2923 #endif
2924 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2925 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2926 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2927 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2928 	mqd->cp_hqd_pq_control = tmp;
2929 
2930 	/* set the wb address whether it's enabled or not */
2931 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2932 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2933 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2934 		upper_32_bits(wb_gpu_addr) & 0xffff;
2935 
2936 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2937 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2938 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2939 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2940 
2941 	tmp = 0;
2942 	/* enable the doorbell if requested */
2943 	if (ring->use_doorbell) {
2944 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2945 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946 				DOORBELL_OFFSET, ring->doorbell_index);
2947 
2948 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949 					 DOORBELL_EN, 1);
2950 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2951 					 DOORBELL_SOURCE, 0);
2952 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2953 					 DOORBELL_HIT, 0);
2954 	}
2955 
2956 	mqd->cp_hqd_pq_doorbell_control = tmp;
2957 
2958 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2959 	ring->wptr = 0;
2960 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2961 
2962 	/* set the vmid for the queue */
2963 	mqd->cp_hqd_vmid = 0;
2964 
2965 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2966 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2967 	mqd->cp_hqd_persistent_state = tmp;
2968 
2969 	/* set MIN_IB_AVAIL_SIZE */
2970 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2971 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2972 	mqd->cp_hqd_ib_control = tmp;
2973 
2974 	/* activate the queue */
2975 	mqd->cp_hqd_active = 1;
2976 
2977 	return 0;
2978 }
2979 
2980 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2981 {
2982 	struct amdgpu_device *adev = ring->adev;
2983 	struct v9_mqd *mqd = ring->mqd_ptr;
2984 	int j;
2985 
2986 	/* disable wptr polling */
2987 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2988 
2989 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2990 	       mqd->cp_hqd_eop_base_addr_lo);
2991 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2992 	       mqd->cp_hqd_eop_base_addr_hi);
2993 
2994 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2995 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2996 	       mqd->cp_hqd_eop_control);
2997 
2998 	/* enable doorbell? */
2999 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3000 	       mqd->cp_hqd_pq_doorbell_control);
3001 
3002 	/* disable the queue if it's active */
3003 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3004 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3005 		for (j = 0; j < adev->usec_timeout; j++) {
3006 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3007 				break;
3008 			udelay(1);
3009 		}
3010 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3011 		       mqd->cp_hqd_dequeue_request);
3012 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3013 		       mqd->cp_hqd_pq_rptr);
3014 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3015 		       mqd->cp_hqd_pq_wptr_lo);
3016 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3017 		       mqd->cp_hqd_pq_wptr_hi);
3018 	}
3019 
3020 	/* set the pointer to the MQD */
3021 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3022 	       mqd->cp_mqd_base_addr_lo);
3023 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3024 	       mqd->cp_mqd_base_addr_hi);
3025 
3026 	/* set MQD vmid to 0 */
3027 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3028 	       mqd->cp_mqd_control);
3029 
3030 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3031 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3032 	       mqd->cp_hqd_pq_base_lo);
3033 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3034 	       mqd->cp_hqd_pq_base_hi);
3035 
3036 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3037 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3038 	       mqd->cp_hqd_pq_control);
3039 
3040 	/* set the wb address whether it's enabled or not */
3041 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3042 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3043 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3044 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3045 
3046 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3047 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3048 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3049 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3050 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3051 
3052 	/* enable the doorbell if requested */
3053 	if (ring->use_doorbell) {
3054 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3055 					(adev->doorbell_index.kiq * 2) << 2);
3056 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3057 					(adev->doorbell_index.userqueue_end * 2) << 2);
3058 	}
3059 
3060 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3061 	       mqd->cp_hqd_pq_doorbell_control);
3062 
3063 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3064 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3065 	       mqd->cp_hqd_pq_wptr_lo);
3066 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3067 	       mqd->cp_hqd_pq_wptr_hi);
3068 
3069 	/* set the vmid for the queue */
3070 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3071 
3072 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3073 	       mqd->cp_hqd_persistent_state);
3074 
3075 	/* activate the queue */
3076 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3077 	       mqd->cp_hqd_active);
3078 
3079 	if (ring->use_doorbell)
3080 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3081 
3082 	return 0;
3083 }
3084 
3085 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3086 {
3087 	struct amdgpu_device *adev = ring->adev;
3088 	int j;
3089 
3090 	/* disable the queue if it's active */
3091 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3092 
3093 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3094 
3095 		for (j = 0; j < adev->usec_timeout; j++) {
3096 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3097 				break;
3098 			udelay(1);
3099 		}
3100 
3101 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3102 			DRM_DEBUG("KIQ dequeue request failed.\n");
3103 
3104 			/* Manual disable if dequeue request times out */
3105 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3106 		}
3107 
3108 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3109 		      0);
3110 	}
3111 
3112 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3113 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3114 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3115 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3116 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3117 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3118 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3119 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3120 
3121 	return 0;
3122 }
3123 
3124 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3125 {
3126 	struct amdgpu_device *adev = ring->adev;
3127 	struct v9_mqd *mqd = ring->mqd_ptr;
3128 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3129 
3130 	gfx_v9_0_kiq_setting(ring);
3131 
3132 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3133 		/* reset MQD to a clean status */
3134 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3135 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3136 
3137 		/* reset ring buffer */
3138 		ring->wptr = 0;
3139 		amdgpu_ring_clear_ring(ring);
3140 
3141 		mutex_lock(&adev->srbm_mutex);
3142 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3143 		gfx_v9_0_kiq_init_register(ring);
3144 		soc15_grbm_select(adev, 0, 0, 0, 0);
3145 		mutex_unlock(&adev->srbm_mutex);
3146 	} else {
3147 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3148 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3149 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3150 		mutex_lock(&adev->srbm_mutex);
3151 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3152 		gfx_v9_0_mqd_init(ring);
3153 		gfx_v9_0_kiq_init_register(ring);
3154 		soc15_grbm_select(adev, 0, 0, 0, 0);
3155 		mutex_unlock(&adev->srbm_mutex);
3156 
3157 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3158 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3159 	}
3160 
3161 	return 0;
3162 }
3163 
3164 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3165 {
3166 	struct amdgpu_device *adev = ring->adev;
3167 	struct v9_mqd *mqd = ring->mqd_ptr;
3168 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3169 
3170 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3171 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3172 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3173 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3174 		mutex_lock(&adev->srbm_mutex);
3175 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3176 		gfx_v9_0_mqd_init(ring);
3177 		soc15_grbm_select(adev, 0, 0, 0, 0);
3178 		mutex_unlock(&adev->srbm_mutex);
3179 
3180 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3181 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3182 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3183 		/* reset MQD to a clean status */
3184 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3185 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3186 
3187 		/* reset ring buffer */
3188 		ring->wptr = 0;
3189 		amdgpu_ring_clear_ring(ring);
3190 	} else {
3191 		amdgpu_ring_clear_ring(ring);
3192 	}
3193 
3194 	return 0;
3195 }
3196 
3197 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3198 {
3199 	struct amdgpu_ring *ring;
3200 	int r;
3201 
3202 	ring = &adev->gfx.kiq.ring;
3203 
3204 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3205 	if (unlikely(r != 0))
3206 		return r;
3207 
3208 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3209 	if (unlikely(r != 0))
3210 		return r;
3211 
3212 	gfx_v9_0_kiq_init_queue(ring);
3213 	amdgpu_bo_kunmap(ring->mqd_obj);
3214 	ring->mqd_ptr = NULL;
3215 	amdgpu_bo_unreserve(ring->mqd_obj);
3216 	ring->sched.ready = true;
3217 	return 0;
3218 }
3219 
3220 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3221 {
3222 	struct amdgpu_ring *ring = NULL;
3223 	int r = 0, i;
3224 
3225 	gfx_v9_0_cp_compute_enable(adev, true);
3226 
3227 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3228 		ring = &adev->gfx.compute_ring[i];
3229 
3230 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3231 		if (unlikely(r != 0))
3232 			goto done;
3233 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3234 		if (!r) {
3235 			r = gfx_v9_0_kcq_init_queue(ring);
3236 			amdgpu_bo_kunmap(ring->mqd_obj);
3237 			ring->mqd_ptr = NULL;
3238 		}
3239 		amdgpu_bo_unreserve(ring->mqd_obj);
3240 		if (r)
3241 			goto done;
3242 	}
3243 
3244 	r = gfx_v9_0_kiq_kcq_enable(adev);
3245 done:
3246 	return r;
3247 }
3248 
3249 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3250 {
3251 	int r, i;
3252 	struct amdgpu_ring *ring;
3253 
3254 	if (!(adev->flags & AMD_IS_APU))
3255 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3256 
3257 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3258 		/* legacy firmware loading */
3259 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
3260 		if (r)
3261 			return r;
3262 
3263 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3264 		if (r)
3265 			return r;
3266 	}
3267 
3268 	r = gfx_v9_0_kiq_resume(adev);
3269 	if (r)
3270 		return r;
3271 
3272 	r = gfx_v9_0_cp_gfx_resume(adev);
3273 	if (r)
3274 		return r;
3275 
3276 	r = gfx_v9_0_kcq_resume(adev);
3277 	if (r)
3278 		return r;
3279 
3280 	ring = &adev->gfx.gfx_ring[0];
3281 	r = amdgpu_ring_test_helper(ring);
3282 	if (r)
3283 		return r;
3284 
3285 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3286 		ring = &adev->gfx.compute_ring[i];
3287 		amdgpu_ring_test_helper(ring);
3288 	}
3289 
3290 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3291 
3292 	return 0;
3293 }
3294 
3295 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3296 {
3297 	gfx_v9_0_cp_gfx_enable(adev, enable);
3298 	gfx_v9_0_cp_compute_enable(adev, enable);
3299 }
3300 
3301 static int gfx_v9_0_hw_init(void *handle)
3302 {
3303 	int r;
3304 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3305 
3306 	gfx_v9_0_init_golden_registers(adev);
3307 
3308 	gfx_v9_0_constants_init(adev);
3309 
3310 	r = gfx_v9_0_csb_vram_pin(adev);
3311 	if (r)
3312 		return r;
3313 
3314 	r = adev->gfx.rlc.funcs->resume(adev);
3315 	if (r)
3316 		return r;
3317 
3318 	r = gfx_v9_0_cp_resume(adev);
3319 	if (r)
3320 		return r;
3321 
3322 	r = gfx_v9_0_ngg_en(adev);
3323 	if (r)
3324 		return r;
3325 
3326 	return r;
3327 }
3328 
3329 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3330 {
3331 	int r, i;
3332 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3333 
3334 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3335 	if (r)
3336 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3337 
3338 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3339 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3340 
3341 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3342 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3343 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3344 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3345 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3346 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3347 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3348 		amdgpu_ring_write(kiq_ring, 0);
3349 		amdgpu_ring_write(kiq_ring, 0);
3350 		amdgpu_ring_write(kiq_ring, 0);
3351 	}
3352 	r = amdgpu_ring_test_helper(kiq_ring);
3353 	if (r)
3354 		DRM_ERROR("KCQ disable failed\n");
3355 
3356 	return r;
3357 }
3358 
3359 static int gfx_v9_0_hw_fini(void *handle)
3360 {
3361 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3362 
3363 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3364 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3365 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3366 
3367 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3368 	gfx_v9_0_kcq_disable(adev);
3369 
3370 	if (amdgpu_sriov_vf(adev)) {
3371 		gfx_v9_0_cp_gfx_enable(adev, false);
3372 		/* must disable polling for SRIOV when hw finished, otherwise
3373 		 * CPC engine may still keep fetching WB address which is already
3374 		 * invalid after sw finished and trigger DMAR reading error in
3375 		 * hypervisor side.
3376 		 */
3377 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3378 		return 0;
3379 	}
3380 
3381 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3382 	 * otherwise KIQ is hanging when binding back
3383 	 */
3384 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3385 		mutex_lock(&adev->srbm_mutex);
3386 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3387 				adev->gfx.kiq.ring.pipe,
3388 				adev->gfx.kiq.ring.queue, 0);
3389 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3390 		soc15_grbm_select(adev, 0, 0, 0, 0);
3391 		mutex_unlock(&adev->srbm_mutex);
3392 	}
3393 
3394 	gfx_v9_0_cp_enable(adev, false);
3395 	adev->gfx.rlc.funcs->stop(adev);
3396 
3397 	gfx_v9_0_csb_vram_unpin(adev);
3398 
3399 	return 0;
3400 }
3401 
3402 static int gfx_v9_0_suspend(void *handle)
3403 {
3404 	return gfx_v9_0_hw_fini(handle);
3405 }
3406 
3407 static int gfx_v9_0_resume(void *handle)
3408 {
3409 	return gfx_v9_0_hw_init(handle);
3410 }
3411 
3412 static bool gfx_v9_0_is_idle(void *handle)
3413 {
3414 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3415 
3416 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3417 				GRBM_STATUS, GUI_ACTIVE))
3418 		return false;
3419 	else
3420 		return true;
3421 }
3422 
3423 static int gfx_v9_0_wait_for_idle(void *handle)
3424 {
3425 	unsigned i;
3426 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3427 
3428 	for (i = 0; i < adev->usec_timeout; i++) {
3429 		if (gfx_v9_0_is_idle(handle))
3430 			return 0;
3431 		udelay(1);
3432 	}
3433 	return -ETIMEDOUT;
3434 }
3435 
3436 static int gfx_v9_0_soft_reset(void *handle)
3437 {
3438 	u32 grbm_soft_reset = 0;
3439 	u32 tmp;
3440 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3441 
3442 	/* GRBM_STATUS */
3443 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3444 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3445 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3446 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3447 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3448 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3449 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3450 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3451 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3452 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3453 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3454 	}
3455 
3456 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3457 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3458 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3459 	}
3460 
3461 	/* GRBM_STATUS2 */
3462 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3463 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3464 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3465 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3466 
3467 
3468 	if (grbm_soft_reset) {
3469 		/* stop the rlc */
3470 		adev->gfx.rlc.funcs->stop(adev);
3471 
3472 		/* Disable GFX parsing/prefetching */
3473 		gfx_v9_0_cp_gfx_enable(adev, false);
3474 
3475 		/* Disable MEC parsing/prefetching */
3476 		gfx_v9_0_cp_compute_enable(adev, false);
3477 
3478 		if (grbm_soft_reset) {
3479 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3480 			tmp |= grbm_soft_reset;
3481 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3482 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3483 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3484 
3485 			udelay(50);
3486 
3487 			tmp &= ~grbm_soft_reset;
3488 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3489 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3490 		}
3491 
3492 		/* Wait a little for things to settle down */
3493 		udelay(50);
3494 	}
3495 	return 0;
3496 }
3497 
3498 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3499 {
3500 	uint64_t clock;
3501 
3502 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3503 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3504 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3505 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3506 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3507 	return clock;
3508 }
3509 
3510 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3511 					  uint32_t vmid,
3512 					  uint32_t gds_base, uint32_t gds_size,
3513 					  uint32_t gws_base, uint32_t gws_size,
3514 					  uint32_t oa_base, uint32_t oa_size)
3515 {
3516 	struct amdgpu_device *adev = ring->adev;
3517 
3518 	/* GDS Base */
3519 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3520 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3521 				   gds_base);
3522 
3523 	/* GDS Size */
3524 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3525 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3526 				   gds_size);
3527 
3528 	/* GWS */
3529 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3530 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3531 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3532 
3533 	/* OA */
3534 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3535 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3536 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3537 }
3538 
3539 static const u32 vgpr_init_compute_shader[] =
3540 {
3541 	0xb07c0000, 0xbe8000ff,
3542 	0x000000f8, 0xbf110800,
3543 	0x7e000280, 0x7e020280,
3544 	0x7e040280, 0x7e060280,
3545 	0x7e080280, 0x7e0a0280,
3546 	0x7e0c0280, 0x7e0e0280,
3547 	0x80808800, 0xbe803200,
3548 	0xbf84fff5, 0xbf9c0000,
3549 	0xd28c0001, 0x0001007f,
3550 	0xd28d0001, 0x0002027e,
3551 	0x10020288, 0xb8810904,
3552 	0xb7814000, 0xd1196a01,
3553 	0x00000301, 0xbe800087,
3554 	0xbefc00c1, 0xd89c4000,
3555 	0x00020201, 0xd89cc080,
3556 	0x00040401, 0x320202ff,
3557 	0x00000800, 0x80808100,
3558 	0xbf84fff8, 0x7e020280,
3559 	0xbf810000, 0x00000000,
3560 };
3561 
3562 static const u32 sgpr_init_compute_shader[] =
3563 {
3564 	0xb07c0000, 0xbe8000ff,
3565 	0x0000005f, 0xbee50080,
3566 	0xbe812c65, 0xbe822c65,
3567 	0xbe832c65, 0xbe842c65,
3568 	0xbe852c65, 0xb77c0005,
3569 	0x80808500, 0xbf84fff8,
3570 	0xbe800080, 0xbf810000,
3571 };
3572 
3573 static const struct soc15_reg_entry vgpr_init_regs[] = {
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3584 };
3585 
3586 static const struct soc15_reg_entry sgpr_init_regs[] = {
3587    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3597 };
3598 
3599 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3600    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3611    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3612    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3614    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3615    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3619    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3622    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3623    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3624    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3625    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3626    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3627    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3628    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3629    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3630    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3631    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3632 };
3633 
3634 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3635 {
3636 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3637 	int i, r;
3638 
3639 	r = amdgpu_ring_alloc(ring, 7);
3640 	if (r) {
3641 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3642 			ring->name, r);
3643 		return r;
3644 	}
3645 
3646 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3647 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3648 
3649 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3650 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3651 				PACKET3_DMA_DATA_DST_SEL(1) |
3652 				PACKET3_DMA_DATA_SRC_SEL(2) |
3653 				PACKET3_DMA_DATA_ENGINE(0)));
3654 	amdgpu_ring_write(ring, 0);
3655 	amdgpu_ring_write(ring, 0);
3656 	amdgpu_ring_write(ring, 0);
3657 	amdgpu_ring_write(ring, 0);
3658 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3659 				adev->gds.gds_size);
3660 
3661 	amdgpu_ring_commit(ring);
3662 
3663 	for (i = 0; i < adev->usec_timeout; i++) {
3664 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3665 			break;
3666 		udelay(1);
3667 	}
3668 
3669 	if (i >= adev->usec_timeout)
3670 		r = -ETIMEDOUT;
3671 
3672 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3673 
3674 	return r;
3675 }
3676 
3677 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3678 {
3679 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3680 	struct amdgpu_ib ib;
3681 	struct dma_fence *f = NULL;
3682 	int r, i, j, k;
3683 	unsigned total_size, vgpr_offset, sgpr_offset;
3684 	u64 gpu_addr;
3685 
3686 	/* only support when RAS is enabled */
3687 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3688 		return 0;
3689 
3690 	/* bail if the compute ring is not ready */
3691 	if (!ring->sched.ready)
3692 		return 0;
3693 
3694 	total_size =
3695 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3696 	total_size +=
3697 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3698 	total_size = ALIGN(total_size, 256);
3699 	vgpr_offset = total_size;
3700 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3701 	sgpr_offset = total_size;
3702 	total_size += sizeof(sgpr_init_compute_shader);
3703 
3704 	/* allocate an indirect buffer to put the commands in */
3705 	memset(&ib, 0, sizeof(ib));
3706 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3707 	if (r) {
3708 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3709 		return r;
3710 	}
3711 
3712 	/* load the compute shaders */
3713 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3714 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3715 
3716 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3717 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3718 
3719 	/* init the ib length to 0 */
3720 	ib.length_dw = 0;
3721 
3722 	/* VGPR */
3723 	/* write the register state for the compute dispatch */
3724 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3725 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3726 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3727 								- PACKET3_SET_SH_REG_START;
3728 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3729 	}
3730 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3731 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3732 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3733 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3734 							- PACKET3_SET_SH_REG_START;
3735 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3736 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3737 
3738 	/* write dispatch packet */
3739 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3740 	ib.ptr[ib.length_dw++] = 128; /* x */
3741 	ib.ptr[ib.length_dw++] = 1; /* y */
3742 	ib.ptr[ib.length_dw++] = 1; /* z */
3743 	ib.ptr[ib.length_dw++] =
3744 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3745 
3746 	/* write CS partial flush packet */
3747 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3748 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3749 
3750 	/* SGPR */
3751 	/* write the register state for the compute dispatch */
3752 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3753 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3754 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3755 								- PACKET3_SET_SH_REG_START;
3756 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3757 	}
3758 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3759 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3760 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3761 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3762 							- PACKET3_SET_SH_REG_START;
3763 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3764 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3765 
3766 	/* write dispatch packet */
3767 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3768 	ib.ptr[ib.length_dw++] = 128; /* x */
3769 	ib.ptr[ib.length_dw++] = 1; /* y */
3770 	ib.ptr[ib.length_dw++] = 1; /* z */
3771 	ib.ptr[ib.length_dw++] =
3772 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3773 
3774 	/* write CS partial flush packet */
3775 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3776 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3777 
3778 	/* shedule the ib on the ring */
3779 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3780 	if (r) {
3781 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3782 		goto fail;
3783 	}
3784 
3785 	/* wait for the GPU to finish processing the IB */
3786 	r = dma_fence_wait(f, false);
3787 	if (r) {
3788 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3789 		goto fail;
3790 	}
3791 
3792 	/* read back registers to clear the counters */
3793 	mutex_lock(&adev->grbm_idx_mutex);
3794 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3795 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3796 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3797 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3798 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3799 			}
3800 		}
3801 	}
3802 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3803 	mutex_unlock(&adev->grbm_idx_mutex);
3804 
3805 fail:
3806 	amdgpu_ib_free(adev, &ib, NULL);
3807 	dma_fence_put(f);
3808 
3809 	return r;
3810 }
3811 
3812 static int gfx_v9_0_early_init(void *handle)
3813 {
3814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3815 
3816 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3817 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3818 	gfx_v9_0_set_ring_funcs(adev);
3819 	gfx_v9_0_set_irq_funcs(adev);
3820 	gfx_v9_0_set_gds_init(adev);
3821 	gfx_v9_0_set_rlc_funcs(adev);
3822 
3823 	return 0;
3824 }
3825 
3826 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3827 		struct amdgpu_iv_entry *entry);
3828 
3829 static int gfx_v9_0_ecc_late_init(void *handle)
3830 {
3831 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3833 	struct ras_ih_if ih_info = {
3834 		.cb = gfx_v9_0_process_ras_data_cb,
3835 	};
3836 	struct ras_fs_if fs_info = {
3837 		.sysfs_name = "gfx_err_count",
3838 		.debugfs_name = "gfx_err_inject",
3839 	};
3840 	struct ras_common_if ras_block = {
3841 		.block = AMDGPU_RAS_BLOCK__GFX,
3842 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3843 		.sub_block_index = 0,
3844 		.name = "gfx",
3845 	};
3846 	int r;
3847 
3848 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3849 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3850 		return 0;
3851 	}
3852 
3853 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
3854 	if (r)
3855 		return r;
3856 
3857 	/* requires IBs so do in late init after IB pool is initialized */
3858 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3859 	if (r)
3860 		return r;
3861 
3862 	/* handle resume path. */
3863 	if (*ras_if) {
3864 		/* resend ras TA enable cmd during resume.
3865 		 * prepare to handle failure.
3866 		 */
3867 		ih_info.head = **ras_if;
3868 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3869 		if (r) {
3870 			if (r == -EAGAIN) {
3871 				/* request a gpu reset. will run again. */
3872 				amdgpu_ras_request_reset_on_boot(adev,
3873 						AMDGPU_RAS_BLOCK__GFX);
3874 				return 0;
3875 			}
3876 			/* fail to enable ras, cleanup all. */
3877 			goto irq;
3878 		}
3879 		/* enable successfully. continue. */
3880 		goto resume;
3881 	}
3882 
3883 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3884 	if (!*ras_if)
3885 		return -ENOMEM;
3886 
3887 	**ras_if = ras_block;
3888 
3889 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3890 	if (r) {
3891 		if (r == -EAGAIN) {
3892 			amdgpu_ras_request_reset_on_boot(adev,
3893 					AMDGPU_RAS_BLOCK__GFX);
3894 			r = 0;
3895 		}
3896 		goto feature;
3897 	}
3898 
3899 	ih_info.head = **ras_if;
3900 	fs_info.head = **ras_if;
3901 
3902 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3903 	if (r)
3904 		goto interrupt;
3905 
3906 	amdgpu_ras_debugfs_create(adev, &fs_info);
3907 
3908 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
3909 	if (r)
3910 		goto sysfs;
3911 resume:
3912 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3913 	if (r)
3914 		goto irq;
3915 
3916 	return 0;
3917 irq:
3918 	amdgpu_ras_sysfs_remove(adev, *ras_if);
3919 sysfs:
3920 	amdgpu_ras_debugfs_remove(adev, *ras_if);
3921 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3922 interrupt:
3923 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
3924 feature:
3925 	kfree(*ras_if);
3926 	*ras_if = NULL;
3927 	return r;
3928 }
3929 
3930 static int gfx_v9_0_late_init(void *handle)
3931 {
3932 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3933 	int r;
3934 
3935 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3936 	if (r)
3937 		return r;
3938 
3939 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3940 	if (r)
3941 		return r;
3942 
3943 	r = gfx_v9_0_ecc_late_init(handle);
3944 	if (r)
3945 		return r;
3946 
3947 	return 0;
3948 }
3949 
3950 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3951 {
3952 	uint32_t rlc_setting;
3953 
3954 	/* if RLC is not enabled, do nothing */
3955 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3956 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3957 		return false;
3958 
3959 	return true;
3960 }
3961 
3962 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3963 {
3964 	uint32_t data;
3965 	unsigned i;
3966 
3967 	data = RLC_SAFE_MODE__CMD_MASK;
3968 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3969 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3970 
3971 	/* wait for RLC_SAFE_MODE */
3972 	for (i = 0; i < adev->usec_timeout; i++) {
3973 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3974 			break;
3975 		udelay(1);
3976 	}
3977 }
3978 
3979 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3980 {
3981 	uint32_t data;
3982 
3983 	data = RLC_SAFE_MODE__CMD_MASK;
3984 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3985 }
3986 
3987 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3988 						bool enable)
3989 {
3990 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3991 
3992 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3993 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3994 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3995 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3996 	} else {
3997 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3998 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3999 	}
4000 
4001 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4002 }
4003 
4004 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4005 						bool enable)
4006 {
4007 	/* TODO: double check if we need to perform under safe mode */
4008 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4009 
4010 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4011 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4012 	else
4013 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4014 
4015 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4016 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4017 	else
4018 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4019 
4020 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4021 }
4022 
4023 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4024 						      bool enable)
4025 {
4026 	uint32_t data, def;
4027 
4028 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4029 
4030 	/* It is disabled by HW by default */
4031 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4032 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4033 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4034 
4035 		if (adev->asic_type != CHIP_VEGA12)
4036 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4037 
4038 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4039 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4040 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4041 
4042 		/* only for Vega10 & Raven1 */
4043 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4044 
4045 		if (def != data)
4046 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4047 
4048 		/* MGLS is a global flag to control all MGLS in GFX */
4049 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4050 			/* 2 - RLC memory Light sleep */
4051 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4052 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4053 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4054 				if (def != data)
4055 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4056 			}
4057 			/* 3 - CP memory Light sleep */
4058 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4059 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4060 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4061 				if (def != data)
4062 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4063 			}
4064 		}
4065 	} else {
4066 		/* 1 - MGCG_OVERRIDE */
4067 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4068 
4069 		if (adev->asic_type != CHIP_VEGA12)
4070 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4071 
4072 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4073 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4074 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4075 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4076 
4077 		if (def != data)
4078 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4079 
4080 		/* 2 - disable MGLS in RLC */
4081 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4082 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4083 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4084 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4085 		}
4086 
4087 		/* 3 - disable MGLS in CP */
4088 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4089 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4090 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4091 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4092 		}
4093 	}
4094 
4095 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4096 }
4097 
4098 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4099 					   bool enable)
4100 {
4101 	uint32_t data, def;
4102 
4103 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4104 
4105 	/* Enable 3D CGCG/CGLS */
4106 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4107 		/* write cmd to clear cgcg/cgls ov */
4108 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4109 		/* unset CGCG override */
4110 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4111 		/* update CGCG and CGLS override bits */
4112 		if (def != data)
4113 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4114 
4115 		/* enable 3Dcgcg FSM(0x0000363f) */
4116 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4117 
4118 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4119 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4120 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4121 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4122 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4123 		if (def != data)
4124 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4125 
4126 		/* set IDLE_POLL_COUNT(0x00900100) */
4127 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4128 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4129 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4130 		if (def != data)
4131 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4132 	} else {
4133 		/* Disable CGCG/CGLS */
4134 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4135 		/* disable cgcg, cgls should be disabled */
4136 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4137 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4138 		/* disable cgcg and cgls in FSM */
4139 		if (def != data)
4140 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4141 	}
4142 
4143 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4144 }
4145 
4146 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4147 						      bool enable)
4148 {
4149 	uint32_t def, data;
4150 
4151 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4152 
4153 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4154 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4155 		/* unset CGCG override */
4156 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4157 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4158 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4159 		else
4160 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4161 		/* update CGCG and CGLS override bits */
4162 		if (def != data)
4163 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4164 
4165 		/* enable cgcg FSM(0x0000363F) */
4166 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4167 
4168 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4169 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4170 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4171 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4172 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4173 		if (def != data)
4174 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4175 
4176 		/* set IDLE_POLL_COUNT(0x00900100) */
4177 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4178 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4179 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4180 		if (def != data)
4181 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4182 	} else {
4183 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4184 		/* reset CGCG/CGLS bits */
4185 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4186 		/* disable cgcg and cgls in FSM */
4187 		if (def != data)
4188 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4189 	}
4190 
4191 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4192 }
4193 
4194 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4195 					    bool enable)
4196 {
4197 	if (enable) {
4198 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4199 		 * ===  MGCG + MGLS ===
4200 		 */
4201 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4202 		/* ===  CGCG /CGLS for GFX 3D Only === */
4203 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4204 		/* ===  CGCG + CGLS === */
4205 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4206 	} else {
4207 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4208 		 * ===  CGCG + CGLS ===
4209 		 */
4210 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4211 		/* ===  CGCG /CGLS for GFX 3D Only === */
4212 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4213 		/* ===  MGCG + MGLS === */
4214 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4215 	}
4216 	return 0;
4217 }
4218 
4219 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4220 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4221 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4222 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4223 	.init = gfx_v9_0_rlc_init,
4224 	.get_csb_size = gfx_v9_0_get_csb_size,
4225 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4226 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4227 	.resume = gfx_v9_0_rlc_resume,
4228 	.stop = gfx_v9_0_rlc_stop,
4229 	.reset = gfx_v9_0_rlc_reset,
4230 	.start = gfx_v9_0_rlc_start
4231 };
4232 
4233 static int gfx_v9_0_set_powergating_state(void *handle,
4234 					  enum amd_powergating_state state)
4235 {
4236 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4237 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4238 
4239 	switch (adev->asic_type) {
4240 	case CHIP_RAVEN:
4241 		if (!enable) {
4242 			amdgpu_gfx_off_ctrl(adev, false);
4243 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4244 		}
4245 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4246 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4247 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4248 		} else {
4249 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4250 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4251 		}
4252 
4253 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4254 			gfx_v9_0_enable_cp_power_gating(adev, true);
4255 		else
4256 			gfx_v9_0_enable_cp_power_gating(adev, false);
4257 
4258 		/* update gfx cgpg state */
4259 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4260 
4261 		/* update mgcg state */
4262 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4263 
4264 		if (enable)
4265 			amdgpu_gfx_off_ctrl(adev, true);
4266 		break;
4267 	case CHIP_VEGA12:
4268 		if (!enable) {
4269 			amdgpu_gfx_off_ctrl(adev, false);
4270 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4271 		} else {
4272 			amdgpu_gfx_off_ctrl(adev, true);
4273 		}
4274 		break;
4275 	default:
4276 		break;
4277 	}
4278 
4279 	return 0;
4280 }
4281 
4282 static int gfx_v9_0_set_clockgating_state(void *handle,
4283 					  enum amd_clockgating_state state)
4284 {
4285 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4286 
4287 	if (amdgpu_sriov_vf(adev))
4288 		return 0;
4289 
4290 	switch (adev->asic_type) {
4291 	case CHIP_VEGA10:
4292 	case CHIP_VEGA12:
4293 	case CHIP_VEGA20:
4294 	case CHIP_RAVEN:
4295 		gfx_v9_0_update_gfx_clock_gating(adev,
4296 						 state == AMD_CG_STATE_GATE ? true : false);
4297 		break;
4298 	default:
4299 		break;
4300 	}
4301 	return 0;
4302 }
4303 
4304 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4305 {
4306 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4307 	int data;
4308 
4309 	if (amdgpu_sriov_vf(adev))
4310 		*flags = 0;
4311 
4312 	/* AMD_CG_SUPPORT_GFX_MGCG */
4313 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4314 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4315 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4316 
4317 	/* AMD_CG_SUPPORT_GFX_CGCG */
4318 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4319 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4320 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4321 
4322 	/* AMD_CG_SUPPORT_GFX_CGLS */
4323 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4324 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4325 
4326 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4327 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4328 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4329 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4330 
4331 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4332 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4333 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4334 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4335 
4336 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4337 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4338 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4339 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4340 
4341 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4342 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4343 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4344 }
4345 
4346 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4347 {
4348 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4349 }
4350 
4351 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4352 {
4353 	struct amdgpu_device *adev = ring->adev;
4354 	u64 wptr;
4355 
4356 	/* XXX check if swapping is necessary on BE */
4357 	if (ring->use_doorbell) {
4358 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4359 	} else {
4360 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4361 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4362 	}
4363 
4364 	return wptr;
4365 }
4366 
4367 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4368 {
4369 	struct amdgpu_device *adev = ring->adev;
4370 
4371 	if (ring->use_doorbell) {
4372 		/* XXX check if swapping is necessary on BE */
4373 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4374 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4375 	} else {
4376 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4377 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4378 	}
4379 }
4380 
4381 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4382 {
4383 	struct amdgpu_device *adev = ring->adev;
4384 	u32 ref_and_mask, reg_mem_engine;
4385 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4386 
4387 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4388 		switch (ring->me) {
4389 		case 1:
4390 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4391 			break;
4392 		case 2:
4393 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4394 			break;
4395 		default:
4396 			return;
4397 		}
4398 		reg_mem_engine = 0;
4399 	} else {
4400 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4401 		reg_mem_engine = 1; /* pfp */
4402 	}
4403 
4404 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4405 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4406 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4407 			      ref_and_mask, ref_and_mask, 0x20);
4408 }
4409 
4410 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4411 					struct amdgpu_job *job,
4412 					struct amdgpu_ib *ib,
4413 					uint32_t flags)
4414 {
4415 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4416 	u32 header, control = 0;
4417 
4418 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4419 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4420 	else
4421 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4422 
4423 	control |= ib->length_dw | (vmid << 24);
4424 
4425 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4426 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4427 
4428 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4429 			gfx_v9_0_ring_emit_de_meta(ring);
4430 	}
4431 
4432 	amdgpu_ring_write(ring, header);
4433 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4434 	amdgpu_ring_write(ring,
4435 #ifdef __BIG_ENDIAN
4436 		(2 << 0) |
4437 #endif
4438 		lower_32_bits(ib->gpu_addr));
4439 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4440 	amdgpu_ring_write(ring, control);
4441 }
4442 
4443 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4444 					  struct amdgpu_job *job,
4445 					  struct amdgpu_ib *ib,
4446 					  uint32_t flags)
4447 {
4448 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4449 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4450 
4451 	/* Currently, there is a high possibility to get wave ID mismatch
4452 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4453 	 * different wave IDs than the GDS expects. This situation happens
4454 	 * randomly when at least 5 compute pipes use GDS ordered append.
4455 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4456 	 * Those are probably bugs somewhere else in the kernel driver.
4457 	 *
4458 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4459 	 * GDS to 0 for this ring (me/pipe).
4460 	 */
4461 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4462 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4463 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4464 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4465 	}
4466 
4467 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4468 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4469 	amdgpu_ring_write(ring,
4470 #ifdef __BIG_ENDIAN
4471 				(2 << 0) |
4472 #endif
4473 				lower_32_bits(ib->gpu_addr));
4474 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4475 	amdgpu_ring_write(ring, control);
4476 }
4477 
4478 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4479 				     u64 seq, unsigned flags)
4480 {
4481 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4482 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4483 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4484 
4485 	/* RELEASE_MEM - flush caches, send int */
4486 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4487 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4488 					       EOP_TC_NC_ACTION_EN) :
4489 					      (EOP_TCL1_ACTION_EN |
4490 					       EOP_TC_ACTION_EN |
4491 					       EOP_TC_WB_ACTION_EN |
4492 					       EOP_TC_MD_ACTION_EN)) |
4493 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4494 				 EVENT_INDEX(5)));
4495 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4496 
4497 	/*
4498 	 * the address should be Qword aligned if 64bit write, Dword
4499 	 * aligned if only send 32bit data low (discard data high)
4500 	 */
4501 	if (write64bit)
4502 		BUG_ON(addr & 0x7);
4503 	else
4504 		BUG_ON(addr & 0x3);
4505 	amdgpu_ring_write(ring, lower_32_bits(addr));
4506 	amdgpu_ring_write(ring, upper_32_bits(addr));
4507 	amdgpu_ring_write(ring, lower_32_bits(seq));
4508 	amdgpu_ring_write(ring, upper_32_bits(seq));
4509 	amdgpu_ring_write(ring, 0);
4510 }
4511 
4512 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4513 {
4514 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4515 	uint32_t seq = ring->fence_drv.sync_seq;
4516 	uint64_t addr = ring->fence_drv.gpu_addr;
4517 
4518 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4519 			      lower_32_bits(addr), upper_32_bits(addr),
4520 			      seq, 0xffffffff, 4);
4521 }
4522 
4523 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4524 					unsigned vmid, uint64_t pd_addr)
4525 {
4526 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4527 
4528 	/* compute doesn't have PFP */
4529 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4530 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4531 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4532 		amdgpu_ring_write(ring, 0x0);
4533 	}
4534 }
4535 
4536 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4537 {
4538 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4539 }
4540 
4541 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4542 {
4543 	u64 wptr;
4544 
4545 	/* XXX check if swapping is necessary on BE */
4546 	if (ring->use_doorbell)
4547 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4548 	else
4549 		BUG();
4550 	return wptr;
4551 }
4552 
4553 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4554 					   bool acquire)
4555 {
4556 	struct amdgpu_device *adev = ring->adev;
4557 	int pipe_num, tmp, reg;
4558 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4559 
4560 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4561 
4562 	/* first me only has 2 entries, GFX and HP3D */
4563 	if (ring->me > 0)
4564 		pipe_num -= 2;
4565 
4566 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4567 	tmp = RREG32(reg);
4568 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4569 	WREG32(reg, tmp);
4570 }
4571 
4572 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4573 					    struct amdgpu_ring *ring,
4574 					    bool acquire)
4575 {
4576 	int i, pipe;
4577 	bool reserve;
4578 	struct amdgpu_ring *iring;
4579 
4580 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4581 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4582 	if (acquire)
4583 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4584 	else
4585 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4586 
4587 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4588 		/* Clear all reservations - everyone reacquires all resources */
4589 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4590 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4591 						       true);
4592 
4593 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4594 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4595 						       true);
4596 	} else {
4597 		/* Lower all pipes without a current reservation */
4598 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4599 			iring = &adev->gfx.gfx_ring[i];
4600 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4601 							   iring->me,
4602 							   iring->pipe,
4603 							   0);
4604 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4605 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4606 		}
4607 
4608 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4609 			iring = &adev->gfx.compute_ring[i];
4610 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4611 							   iring->me,
4612 							   iring->pipe,
4613 							   0);
4614 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4615 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4616 		}
4617 	}
4618 
4619 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4620 }
4621 
4622 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4623 				      struct amdgpu_ring *ring,
4624 				      bool acquire)
4625 {
4626 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4627 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4628 
4629 	mutex_lock(&adev->srbm_mutex);
4630 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4631 
4632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4633 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4634 
4635 	soc15_grbm_select(adev, 0, 0, 0, 0);
4636 	mutex_unlock(&adev->srbm_mutex);
4637 }
4638 
4639 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4640 					       enum drm_sched_priority priority)
4641 {
4642 	struct amdgpu_device *adev = ring->adev;
4643 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4644 
4645 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4646 		return;
4647 
4648 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4649 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4650 }
4651 
4652 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4653 {
4654 	struct amdgpu_device *adev = ring->adev;
4655 
4656 	/* XXX check if swapping is necessary on BE */
4657 	if (ring->use_doorbell) {
4658 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4659 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4660 	} else{
4661 		BUG(); /* only DOORBELL method supported on gfx9 now */
4662 	}
4663 }
4664 
4665 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4666 					 u64 seq, unsigned int flags)
4667 {
4668 	struct amdgpu_device *adev = ring->adev;
4669 
4670 	/* we only allocate 32bit for each seq wb address */
4671 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4672 
4673 	/* write fence seq to the "addr" */
4674 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4675 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4676 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4677 	amdgpu_ring_write(ring, lower_32_bits(addr));
4678 	amdgpu_ring_write(ring, upper_32_bits(addr));
4679 	amdgpu_ring_write(ring, lower_32_bits(seq));
4680 
4681 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4682 		/* set register to trigger INT */
4683 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4684 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4685 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4686 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4687 		amdgpu_ring_write(ring, 0);
4688 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4689 	}
4690 }
4691 
4692 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4693 {
4694 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4695 	amdgpu_ring_write(ring, 0);
4696 }
4697 
4698 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4699 {
4700 	struct v9_ce_ib_state ce_payload = {0};
4701 	uint64_t csa_addr;
4702 	int cnt;
4703 
4704 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4705 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4706 
4707 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4708 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4709 				 WRITE_DATA_DST_SEL(8) |
4710 				 WR_CONFIRM) |
4711 				 WRITE_DATA_CACHE_POLICY(0));
4712 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4713 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4714 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4715 }
4716 
4717 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4718 {
4719 	struct v9_de_ib_state de_payload = {0};
4720 	uint64_t csa_addr, gds_addr;
4721 	int cnt;
4722 
4723 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4724 	gds_addr = csa_addr + 4096;
4725 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4726 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4727 
4728 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4729 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4730 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4731 				 WRITE_DATA_DST_SEL(8) |
4732 				 WR_CONFIRM) |
4733 				 WRITE_DATA_CACHE_POLICY(0));
4734 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4735 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4736 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4737 }
4738 
4739 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4740 {
4741 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4742 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4743 }
4744 
4745 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4746 {
4747 	uint32_t dw2 = 0;
4748 
4749 	if (amdgpu_sriov_vf(ring->adev))
4750 		gfx_v9_0_ring_emit_ce_meta(ring);
4751 
4752 	gfx_v9_0_ring_emit_tmz(ring, true);
4753 
4754 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4755 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4756 		/* set load_global_config & load_global_uconfig */
4757 		dw2 |= 0x8001;
4758 		/* set load_cs_sh_regs */
4759 		dw2 |= 0x01000000;
4760 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4761 		dw2 |= 0x10002;
4762 
4763 		/* set load_ce_ram if preamble presented */
4764 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4765 			dw2 |= 0x10000000;
4766 	} else {
4767 		/* still load_ce_ram if this is the first time preamble presented
4768 		 * although there is no context switch happens.
4769 		 */
4770 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4771 			dw2 |= 0x10000000;
4772 	}
4773 
4774 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4775 	amdgpu_ring_write(ring, dw2);
4776 	amdgpu_ring_write(ring, 0);
4777 }
4778 
4779 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4780 {
4781 	unsigned ret;
4782 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4783 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4784 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4785 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4786 	ret = ring->wptr & ring->buf_mask;
4787 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4788 	return ret;
4789 }
4790 
4791 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4792 {
4793 	unsigned cur;
4794 	BUG_ON(offset > ring->buf_mask);
4795 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4796 
4797 	cur = (ring->wptr & ring->buf_mask) - 1;
4798 	if (likely(cur > offset))
4799 		ring->ring[offset] = cur - offset;
4800 	else
4801 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4802 }
4803 
4804 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4805 {
4806 	struct amdgpu_device *adev = ring->adev;
4807 
4808 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4809 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4810 				(5 << 8) |	/* dst: memory */
4811 				(1 << 20));	/* write confirm */
4812 	amdgpu_ring_write(ring, reg);
4813 	amdgpu_ring_write(ring, 0);
4814 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4815 				adev->virt.reg_val_offs * 4));
4816 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4817 				adev->virt.reg_val_offs * 4));
4818 }
4819 
4820 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4821 				    uint32_t val)
4822 {
4823 	uint32_t cmd = 0;
4824 
4825 	switch (ring->funcs->type) {
4826 	case AMDGPU_RING_TYPE_GFX:
4827 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4828 		break;
4829 	case AMDGPU_RING_TYPE_KIQ:
4830 		cmd = (1 << 16); /* no inc addr */
4831 		break;
4832 	default:
4833 		cmd = WR_CONFIRM;
4834 		break;
4835 	}
4836 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4837 	amdgpu_ring_write(ring, cmd);
4838 	amdgpu_ring_write(ring, reg);
4839 	amdgpu_ring_write(ring, 0);
4840 	amdgpu_ring_write(ring, val);
4841 }
4842 
4843 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4844 					uint32_t val, uint32_t mask)
4845 {
4846 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4847 }
4848 
4849 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4850 						  uint32_t reg0, uint32_t reg1,
4851 						  uint32_t ref, uint32_t mask)
4852 {
4853 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4854 	struct amdgpu_device *adev = ring->adev;
4855 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4856 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4857 
4858 	if (fw_version_ok)
4859 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4860 				      ref, mask, 0x20);
4861 	else
4862 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4863 							   ref, mask);
4864 }
4865 
4866 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4867 {
4868 	struct amdgpu_device *adev = ring->adev;
4869 	uint32_t value = 0;
4870 
4871 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4872 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4873 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4874 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4875 	WREG32(mmSQ_CMD, value);
4876 }
4877 
4878 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4879 						 enum amdgpu_interrupt_state state)
4880 {
4881 	switch (state) {
4882 	case AMDGPU_IRQ_STATE_DISABLE:
4883 	case AMDGPU_IRQ_STATE_ENABLE:
4884 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4885 			       TIME_STAMP_INT_ENABLE,
4886 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4887 		break;
4888 	default:
4889 		break;
4890 	}
4891 }
4892 
4893 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4894 						     int me, int pipe,
4895 						     enum amdgpu_interrupt_state state)
4896 {
4897 	u32 mec_int_cntl, mec_int_cntl_reg;
4898 
4899 	/*
4900 	 * amdgpu controls only the first MEC. That's why this function only
4901 	 * handles the setting of interrupts for this specific MEC. All other
4902 	 * pipes' interrupts are set by amdkfd.
4903 	 */
4904 
4905 	if (me == 1) {
4906 		switch (pipe) {
4907 		case 0:
4908 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4909 			break;
4910 		case 1:
4911 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4912 			break;
4913 		case 2:
4914 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4915 			break;
4916 		case 3:
4917 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4918 			break;
4919 		default:
4920 			DRM_DEBUG("invalid pipe %d\n", pipe);
4921 			return;
4922 		}
4923 	} else {
4924 		DRM_DEBUG("invalid me %d\n", me);
4925 		return;
4926 	}
4927 
4928 	switch (state) {
4929 	case AMDGPU_IRQ_STATE_DISABLE:
4930 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4931 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4932 					     TIME_STAMP_INT_ENABLE, 0);
4933 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4934 		break;
4935 	case AMDGPU_IRQ_STATE_ENABLE:
4936 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4937 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4938 					     TIME_STAMP_INT_ENABLE, 1);
4939 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4940 		break;
4941 	default:
4942 		break;
4943 	}
4944 }
4945 
4946 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4947 					     struct amdgpu_irq_src *source,
4948 					     unsigned type,
4949 					     enum amdgpu_interrupt_state state)
4950 {
4951 	switch (state) {
4952 	case AMDGPU_IRQ_STATE_DISABLE:
4953 	case AMDGPU_IRQ_STATE_ENABLE:
4954 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4955 			       PRIV_REG_INT_ENABLE,
4956 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4957 		break;
4958 	default:
4959 		break;
4960 	}
4961 
4962 	return 0;
4963 }
4964 
4965 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4966 					      struct amdgpu_irq_src *source,
4967 					      unsigned type,
4968 					      enum amdgpu_interrupt_state state)
4969 {
4970 	switch (state) {
4971 	case AMDGPU_IRQ_STATE_DISABLE:
4972 	case AMDGPU_IRQ_STATE_ENABLE:
4973 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4974 			       PRIV_INSTR_INT_ENABLE,
4975 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4976 	default:
4977 		break;
4978 	}
4979 
4980 	return 0;
4981 }
4982 
4983 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
4984 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4985 			CP_ECC_ERROR_INT_ENABLE, 1)
4986 
4987 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
4988 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4989 			CP_ECC_ERROR_INT_ENABLE, 0)
4990 
4991 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4992 					      struct amdgpu_irq_src *source,
4993 					      unsigned type,
4994 					      enum amdgpu_interrupt_state state)
4995 {
4996 	switch (state) {
4997 	case AMDGPU_IRQ_STATE_DISABLE:
4998 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4999 				CP_ECC_ERROR_INT_ENABLE, 0);
5000 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5001 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5002 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5003 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5004 		break;
5005 
5006 	case AMDGPU_IRQ_STATE_ENABLE:
5007 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5008 				CP_ECC_ERROR_INT_ENABLE, 1);
5009 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5010 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5011 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5012 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5013 		break;
5014 	default:
5015 		break;
5016 	}
5017 
5018 	return 0;
5019 }
5020 
5021 
5022 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5023 					    struct amdgpu_irq_src *src,
5024 					    unsigned type,
5025 					    enum amdgpu_interrupt_state state)
5026 {
5027 	switch (type) {
5028 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5029 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5030 		break;
5031 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5032 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5033 		break;
5034 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5035 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5036 		break;
5037 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5038 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5039 		break;
5040 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5041 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5042 		break;
5043 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5044 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5045 		break;
5046 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5047 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5048 		break;
5049 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5050 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5051 		break;
5052 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5053 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5054 		break;
5055 	default:
5056 		break;
5057 	}
5058 	return 0;
5059 }
5060 
5061 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5062 			    struct amdgpu_irq_src *source,
5063 			    struct amdgpu_iv_entry *entry)
5064 {
5065 	int i;
5066 	u8 me_id, pipe_id, queue_id;
5067 	struct amdgpu_ring *ring;
5068 
5069 	DRM_DEBUG("IH: CP EOP\n");
5070 	me_id = (entry->ring_id & 0x0c) >> 2;
5071 	pipe_id = (entry->ring_id & 0x03) >> 0;
5072 	queue_id = (entry->ring_id & 0x70) >> 4;
5073 
5074 	switch (me_id) {
5075 	case 0:
5076 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5077 		break;
5078 	case 1:
5079 	case 2:
5080 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5081 			ring = &adev->gfx.compute_ring[i];
5082 			/* Per-queue interrupt is supported for MEC starting from VI.
5083 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5084 			  */
5085 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5086 				amdgpu_fence_process(ring);
5087 		}
5088 		break;
5089 	}
5090 	return 0;
5091 }
5092 
5093 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5094 			   struct amdgpu_iv_entry *entry)
5095 {
5096 	u8 me_id, pipe_id, queue_id;
5097 	struct amdgpu_ring *ring;
5098 	int i;
5099 
5100 	me_id = (entry->ring_id & 0x0c) >> 2;
5101 	pipe_id = (entry->ring_id & 0x03) >> 0;
5102 	queue_id = (entry->ring_id & 0x70) >> 4;
5103 
5104 	switch (me_id) {
5105 	case 0:
5106 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5107 		break;
5108 	case 1:
5109 	case 2:
5110 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5111 			ring = &adev->gfx.compute_ring[i];
5112 			if (ring->me == me_id && ring->pipe == pipe_id &&
5113 			    ring->queue == queue_id)
5114 				drm_sched_fault(&ring->sched);
5115 		}
5116 		break;
5117 	}
5118 }
5119 
5120 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5121 				 struct amdgpu_irq_src *source,
5122 				 struct amdgpu_iv_entry *entry)
5123 {
5124 	DRM_ERROR("Illegal register access in command stream\n");
5125 	gfx_v9_0_fault(adev, entry);
5126 	return 0;
5127 }
5128 
5129 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5130 				  struct amdgpu_irq_src *source,
5131 				  struct amdgpu_iv_entry *entry)
5132 {
5133 	DRM_ERROR("Illegal instruction in command stream\n");
5134 	gfx_v9_0_fault(adev, entry);
5135 	return 0;
5136 }
5137 
5138 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5139 		struct amdgpu_iv_entry *entry)
5140 {
5141 	/* TODO ue will trigger an interrupt. */
5142 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5143 	amdgpu_ras_reset_gpu(adev, 0);
5144 	return AMDGPU_RAS_UE;
5145 }
5146 
5147 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5148 				  struct amdgpu_irq_src *source,
5149 				  struct amdgpu_iv_entry *entry)
5150 {
5151 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5152 	struct ras_dispatch_if ih_data = {
5153 		.entry = entry,
5154 	};
5155 
5156 	if (!ras_if)
5157 		return 0;
5158 
5159 	ih_data.head = *ras_if;
5160 
5161 	DRM_ERROR("CP ECC ERROR IRQ\n");
5162 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5163 	return 0;
5164 }
5165 
5166 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5167 	.name = "gfx_v9_0",
5168 	.early_init = gfx_v9_0_early_init,
5169 	.late_init = gfx_v9_0_late_init,
5170 	.sw_init = gfx_v9_0_sw_init,
5171 	.sw_fini = gfx_v9_0_sw_fini,
5172 	.hw_init = gfx_v9_0_hw_init,
5173 	.hw_fini = gfx_v9_0_hw_fini,
5174 	.suspend = gfx_v9_0_suspend,
5175 	.resume = gfx_v9_0_resume,
5176 	.is_idle = gfx_v9_0_is_idle,
5177 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5178 	.soft_reset = gfx_v9_0_soft_reset,
5179 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5180 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5181 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5182 };
5183 
5184 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5185 	.type = AMDGPU_RING_TYPE_GFX,
5186 	.align_mask = 0xff,
5187 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5188 	.support_64bit_ptrs = true,
5189 	.vmhub = AMDGPU_GFXHUB,
5190 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5191 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5192 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5193 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5194 		5 +  /* COND_EXEC */
5195 		7 +  /* PIPELINE_SYNC */
5196 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5197 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5198 		2 + /* VM_FLUSH */
5199 		8 +  /* FENCE for VM_FLUSH */
5200 		20 + /* GDS switch */
5201 		4 + /* double SWITCH_BUFFER,
5202 		       the first COND_EXEC jump to the place just
5203 			   prior to this double SWITCH_BUFFER  */
5204 		5 + /* COND_EXEC */
5205 		7 +	 /*	HDP_flush */
5206 		4 +	 /*	VGT_flush */
5207 		14 + /*	CE_META */
5208 		31 + /*	DE_META */
5209 		3 + /* CNTX_CTRL */
5210 		5 + /* HDP_INVL */
5211 		8 + 8 + /* FENCE x2 */
5212 		2, /* SWITCH_BUFFER */
5213 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5214 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5215 	.emit_fence = gfx_v9_0_ring_emit_fence,
5216 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5217 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5218 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5219 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5220 	.test_ring = gfx_v9_0_ring_test_ring,
5221 	.test_ib = gfx_v9_0_ring_test_ib,
5222 	.insert_nop = amdgpu_ring_insert_nop,
5223 	.pad_ib = amdgpu_ring_generic_pad_ib,
5224 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5225 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5226 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5227 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5228 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5229 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5230 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5231 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5232 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5233 };
5234 
5235 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5236 	.type = AMDGPU_RING_TYPE_COMPUTE,
5237 	.align_mask = 0xff,
5238 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5239 	.support_64bit_ptrs = true,
5240 	.vmhub = AMDGPU_GFXHUB,
5241 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5242 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5243 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5244 	.emit_frame_size =
5245 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5246 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5247 		5 + /* hdp invalidate */
5248 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5249 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5250 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5251 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5252 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5253 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5254 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5255 	.emit_fence = gfx_v9_0_ring_emit_fence,
5256 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5257 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5258 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5259 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5260 	.test_ring = gfx_v9_0_ring_test_ring,
5261 	.test_ib = gfx_v9_0_ring_test_ib,
5262 	.insert_nop = amdgpu_ring_insert_nop,
5263 	.pad_ib = amdgpu_ring_generic_pad_ib,
5264 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5265 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5266 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5267 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5268 };
5269 
5270 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5271 	.type = AMDGPU_RING_TYPE_KIQ,
5272 	.align_mask = 0xff,
5273 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5274 	.support_64bit_ptrs = true,
5275 	.vmhub = AMDGPU_GFXHUB,
5276 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5277 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5278 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5279 	.emit_frame_size =
5280 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5281 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5282 		5 + /* hdp invalidate */
5283 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5284 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5285 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5286 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5287 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5288 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5289 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5290 	.test_ring = gfx_v9_0_ring_test_ring,
5291 	.insert_nop = amdgpu_ring_insert_nop,
5292 	.pad_ib = amdgpu_ring_generic_pad_ib,
5293 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5294 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5295 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5296 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5297 };
5298 
5299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5300 {
5301 	int i;
5302 
5303 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5304 
5305 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5306 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5307 
5308 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5309 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5310 }
5311 
5312 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5313 	.set = gfx_v9_0_set_eop_interrupt_state,
5314 	.process = gfx_v9_0_eop_irq,
5315 };
5316 
5317 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5318 	.set = gfx_v9_0_set_priv_reg_fault_state,
5319 	.process = gfx_v9_0_priv_reg_irq,
5320 };
5321 
5322 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5323 	.set = gfx_v9_0_set_priv_inst_fault_state,
5324 	.process = gfx_v9_0_priv_inst_irq,
5325 };
5326 
5327 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5328 	.set = gfx_v9_0_set_cp_ecc_error_state,
5329 	.process = gfx_v9_0_cp_ecc_error_irq,
5330 };
5331 
5332 
5333 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5334 {
5335 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5336 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5337 
5338 	adev->gfx.priv_reg_irq.num_types = 1;
5339 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5340 
5341 	adev->gfx.priv_inst_irq.num_types = 1;
5342 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5343 
5344 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5345 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5346 }
5347 
5348 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5349 {
5350 	switch (adev->asic_type) {
5351 	case CHIP_VEGA10:
5352 	case CHIP_VEGA12:
5353 	case CHIP_VEGA20:
5354 	case CHIP_RAVEN:
5355 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5356 		break;
5357 	default:
5358 		break;
5359 	}
5360 }
5361 
5362 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5363 {
5364 	/* init asci gds info */
5365 	switch (adev->asic_type) {
5366 	case CHIP_VEGA10:
5367 	case CHIP_VEGA12:
5368 	case CHIP_VEGA20:
5369 		adev->gds.gds_size = 0x10000;
5370 		break;
5371 	case CHIP_RAVEN:
5372 		adev->gds.gds_size = 0x1000;
5373 		break;
5374 	default:
5375 		adev->gds.gds_size = 0x10000;
5376 		break;
5377 	}
5378 
5379 	switch (adev->asic_type) {
5380 	case CHIP_VEGA10:
5381 	case CHIP_VEGA20:
5382 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5383 		break;
5384 	case CHIP_VEGA12:
5385 		adev->gds.gds_compute_max_wave_id = 0x27f;
5386 		break;
5387 	case CHIP_RAVEN:
5388 		if (adev->rev_id >= 0x8)
5389 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5390 		else
5391 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5392 		break;
5393 	default:
5394 		/* this really depends on the chip */
5395 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5396 		break;
5397 	}
5398 
5399 	adev->gds.gws_size = 64;
5400 	adev->gds.oa_size = 16;
5401 }
5402 
5403 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5404 						 u32 bitmap)
5405 {
5406 	u32 data;
5407 
5408 	if (!bitmap)
5409 		return;
5410 
5411 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5412 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5413 
5414 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5415 }
5416 
5417 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5418 {
5419 	u32 data, mask;
5420 
5421 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5422 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5423 
5424 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5425 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5426 
5427 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5428 
5429 	return (~data) & mask;
5430 }
5431 
5432 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5433 				 struct amdgpu_cu_info *cu_info)
5434 {
5435 	int i, j, k, counter, active_cu_number = 0;
5436 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5437 	unsigned disable_masks[4 * 2];
5438 
5439 	if (!adev || !cu_info)
5440 		return -EINVAL;
5441 
5442 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5443 
5444 	mutex_lock(&adev->grbm_idx_mutex);
5445 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5446 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5447 			mask = 1;
5448 			ao_bitmap = 0;
5449 			counter = 0;
5450 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5451 			if (i < 4 && j < 2)
5452 				gfx_v9_0_set_user_cu_inactive_bitmap(
5453 					adev, disable_masks[i * 2 + j]);
5454 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5455 			cu_info->bitmap[i][j] = bitmap;
5456 
5457 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5458 				if (bitmap & mask) {
5459 					if (counter < adev->gfx.config.max_cu_per_sh)
5460 						ao_bitmap |= mask;
5461 					counter ++;
5462 				}
5463 				mask <<= 1;
5464 			}
5465 			active_cu_number += counter;
5466 			if (i < 2 && j < 2)
5467 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5468 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5469 		}
5470 	}
5471 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5472 	mutex_unlock(&adev->grbm_idx_mutex);
5473 
5474 	cu_info->number = active_cu_number;
5475 	cu_info->ao_cu_mask = ao_cu_mask;
5476 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5477 
5478 	return 0;
5479 }
5480 
5481 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5482 {
5483 	.type = AMD_IP_BLOCK_TYPE_GFX,
5484 	.major = 9,
5485 	.minor = 0,
5486 	.rev = 0,
5487 	.funcs = &gfx_v9_0_ip_funcs,
5488 };
5489